#include <string.h>
#include <stdio.h>
#include <assert.h>
typedef unsigned int UInt;
typedef signed int Int;
typedef unsigned char UChar;
typedef unsigned long long int ULong;
typedef UChar Bool;
#define False ((Bool)0)
#define True ((Bool)1)
//typedef unsigned char V128[16];
union {
UChar uChar[16];
UInt uInt[4];
static UChar fromhex(char x) {
if (x >= '0' && x <= '9') { return(x - '0'); }
else if (x >= 'A' && x <= 'F') { return(x - 'A' + 10); }
else if (x >= 'a' && x <= 'f') { return(x - 'a' + 10); }
else assert(0);
static void expand ( V128* dst, char* summary )
Int i;
assert( strlen(summary) == 32 );
for (i = 0; i < 16; i++) {
UChar xx = 0;
UChar x = summary[31-2*i];
UChar yy = 0;
UChar y = summary[31-2*i-1];
xx = fromhex (x);
yy = fromhex (y);
assert(xx < 16);
assert(yy < 16);
xx = (yy << 4) | xx;
assert(xx < 256);
dst->uChar[i] = xx;
static int tohex (int nib)
if (nib < 10)
return '0' + nib;
return 'a' + nib - 10;
static void unexpand ( V128* dst, char* summary )
Int i;
for (i = 0; i < 16; i++) {
*summary++ = tohex((dst->uChar[i] >> 4) & 0xf);
*summary++ = tohex(dst->uChar[i] & 0xf);
*summary = 0;
static void AESDEC(char *s_argL, char *s_argR, char *s_exp)
; xmm1 and xmm2 hold two 128-bit inputs (xmm1 = State; xmm2 = Round key).
; The result is delivered in xmm1.
V128 argL, argR;
V128 res;
char s_res[33];
V128 exp;
expand(&argL, s_argL);
expand(&argR, s_argR);
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu %1, %%xmm1" "\n\t"
"movdqu %2, %%xmm2" "\n\t"
"aesdec %%xmm2, %%xmm1" "\n\t"
"movdqu %%xmm1, %0" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=m"(res)
: "m"/*in*/(argL), "m"/*in*/(argR)
: /*trash*/ "xmm1", "xmm2"
if (strlen(s_exp) > 0) {
expand(&exp, s_exp);
assert (0 == memcmp(&res, &exp, 16));
unexpand (&res, s_res);
printf ("aesdec %s %s result %s\n", s_argL, s_argR, s_res);
static void AESDECLAST(char *s_argL, char *s_argR, char *s_exp)
; xmm1 and xmm2 hold two 128-bit inputs (xmm1 = State; xmm2 = Round key).
; The result is delivered in xmm1.
V128 argL, argR;
V128 res;
char s_res[33];
V128 exp;
expand(&argL, s_argL);
expand(&argR, s_argR);
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu %1, %%xmm1" "\n\t"
"movdqu %2, %%xmm2" "\n\t"
"aesdeclast %%xmm2, %%xmm1" "\n\t"
"movdqu %%xmm1, %0" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=m"(res)
: "m"/*in*/(argL), "m"/*in*/(argR)
: /*trash*/ "xmm1", "xmm2"
if (strlen(s_exp) > 0) {
expand(&exp, s_exp);
assert (0 == memcmp(&res, &exp, 16));
unexpand (&res, s_res);
printf ("aesdeclast %s %s result %s\n", s_argL, s_argR, s_res);
static void AESENC(char *s_argL, char *s_argR, char *s_exp)
; xmm1 and xmm2 hold two 128-bit inputs (xmm1 = State; xmm2 = Round key).
; The result is delivered in xmm1.
V128 argL, argR;
V128 res;
char s_res[33];
V128 exp;
expand(&argL, s_argL);
expand(&argR, s_argR);
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu %1, %%xmm1" "\n\t"
"movdqu %2, %%xmm2" "\n\t"
"aesenc %%xmm2, %%xmm1" "\n\t"
"movdqu %%xmm1, %0" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=m"(res)
: "m"/*in*/(argL), "m"/*in*/(argR)
: /*trash*/ "xmm1", "xmm2"
if (strlen(s_exp) > 0) {
expand(&exp, s_exp);
assert (0 == memcmp(&res, &exp, 16));
unexpand (&res, s_res);
printf ("aesenc %s %s result %s\n", s_argL, s_argR, s_res);
static void AESENCLAST(char *s_argL, char *s_argR, char *s_exp)
; xmm1 and xmm2 hold two 128-bit inputs (xmm1 = State; xmm2 = Round key)
; The result delivered in xmm1
V128 argL, argR;
V128 res;
char s_res[33];
V128 exp;
expand(&argL, s_argL);
expand(&argR, s_argR);
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu %1, %%xmm1" "\n\t"
"movdqu %2, %%xmm2" "\n\t"
"aesenclast %%xmm2, %%xmm1" "\n\t"
"movdqu %%xmm1, %0" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=m"(res)
: "m"/*in*/(argL), "m"/*in*/(argR)
: /*trash*/ "xmm1", "xmm2"
if (strlen(s_exp) > 0) {
expand(&exp, s_exp);
assert (0 == memcmp(&res, &exp, 16));
unexpand (&res, s_res);
printf ("aesenclast %s %s result %s\n", s_argL, s_argR, s_res);
static void AESIMC(char *s_argR, char *s_exp)
/* We test another way to pass input and get results */
/* ; argR hold one 128-bit inputs (argR = Round key)
; result delivered in xmm5 */
V128 argR;
V128 res;
char s_res[33];
V128 exp;
expand(&argR, s_argR);
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"aesimc %1, %%xmm5" "\n\t"
"movdqu %%xmm5, %0" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=m"(res)
: "m"/*in*/(argR)
: /*trash*/ "xmm5"
if (strlen(s_exp) > 0) {
expand(&exp, s_exp);
assert (0 == memcmp(&res, &exp, 16));
unexpand (&res, s_res);
printf ("aesimc %s result %s\n", s_argR, s_res);
static void AESKEYGENASSIST(int imm, char* s_argL, char* s_exp)
; xmm2 holds a 128-bit input; imm8 holds the RCON value
; result delivered in xmm1
V128 argL;
V128 res;
char s_res[33];
V128 exp;
expand(&argL, s_argL);
if (imm == 1)
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu %1, %%xmm2" "\n\t"
"aeskeygenassist $1,%%xmm2, %%xmm1" "\n\t"
"movdqu %%xmm1, %0" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=m"(res)
: "m"/*in*/(argL)
: /*trash*/ "xmm1", "xmm2"
else if (imm == 2)
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu %1, %%xmm2" "\n\t"
"aeskeygenassist $2,%%xmm2, %%xmm1" "\n\t"
"movdqu %%xmm1, %0" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=m"(res)
: "m"/*in*/(argL)
: /*trash*/ "xmm1", "xmm2"
else if (imm == 8)
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu %1, %%xmm2" "\n\t"
"aeskeygenassist $8,%%xmm2, %%xmm1" "\n\t"
"movdqu %%xmm1, %0" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=m"(res)
: "m"/*in*/(argL)
: /*trash*/ "xmm1", "xmm2"
else assert (0);
if (strlen(s_exp) > 0) {
expand(&exp, s_exp);
assert (0 == memcmp(&res, &exp, 16));
unexpand (&res, s_res);
printf ("aeskeygenassist %d %s result %s\n", imm, s_argL, s_res);
typedef struct Aes_Args {
char* argL;
char* argR;
int imm; // only for aeskeygenassist
} Aes_Args;
/* Just a bunch of various data to compare a native run
with a run under Valgrind. */
static const Aes_Args aes_args[] = {
int main ( void )
int i;
/* test the various instructions, using the examples provided
in "White Paper Intel Advanced Encryption Standard AES
instruction set" January 2010 (26/1/2010)
Rev. 3.0
by Shay Gueron */
/* ??? the AESIMC example given in the Intel White paper
seems wrong.
The below fails both under Valgrind and natively.
So we use the example given for the InvMixColums
transformation. */
/* and now a bunch of other calls. The below are verified
using the aes.stdout.exp (produced by a native run). */
for (i = 0; aes_args[i].argL != NULL; i++) {
AESKEYGENASSIST(aes_args[i].imm, aes_args[i].argL, "");
AESENC(aes_args[i].argL, aes_args[i].argR, "");
AESENCLAST(aes_args[i].argL, aes_args[i].argR, "");
AESDEC(aes_args[i].argL, aes_args[i].argR, "");
AESDECLAST(aes_args[i].argL, aes_args[i].argR, "");
AESIMC(aes_args[i].argL, "");
return 0;