/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not check the core arithmetic in any detail. */ #include <string.h> #include <stdio.h> #include <assert.h> typedef unsigned char V128[16]; typedef unsigned int UInt; typedef signed int Int; typedef unsigned char UChar; typedef unsigned long long int ULong; typedef UChar Bool; #define False ((Bool)0) #define True ((Bool)1) void show_V128 ( V128* vec ) { Int i; for (i = 15; i >= 0; i--) printf("%02x", (UInt)( (*vec)[i] )); } void expand ( V128* dst, char* summary ) { Int i; assert( strlen(summary) == 16 ); for (i = 0; i < 16; i++) { UChar xx = 0; UChar x = summary[15-i]; if (x >= '0' && x <= '9') { xx = x - '0'; } else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } else assert(0); assert(xx < 16); xx = (xx << 4) | xx; assert(xx < 256); (*dst)[i] = xx; } } void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN ) { V128 argL, argR; expand( &argL, summL ); expand( &argR, summR ); printf("\n"); printf("rdx %016llx argL ", rdxIN); show_V128(&argL); printf(" rax %016llx argR ", raxIN); show_V128(&argR); printf("\n"); ULong block[ 2/*in:argL*/ // 0 0 + 2/*in:argR*/ // 2 16 + 1/*in:rdx*/ // 4 32 + 1/*in:rax*/ // 5 40 + 2/*inout:xmm0*/ // 6 48 + 1/*inout:rcx*/ // 8 64 + 1/*out:rflags*/ ]; // 9 72 assert(sizeof(block) == 80); UChar* blockC = (UChar*)&block[0]; /* ---------------- ISTRI_4A ---------------- */ memset(blockC, 0x55, 80); memcpy(blockC + 0, &argL, 16); memcpy(blockC + 16, &argR, 16); memcpy(blockC + 24, &rdxIN, 8); memcpy(blockC + 32, &raxIN, 8); memcpy(blockC + 40, &rdxIN, 8); __asm__ __volatile__( "movupd 0(%0), %%xmm2" "\n\t" "movupd 16(%0), %%xmm13" "\n\t" "movq 32(%0), %%rdx" "\n\t" "movq 40(%0), %%rax" "\n\t" "movupd 48(%0), %%xmm0" "\n\t" "movw 64(%0), %%rcx" "\n\t" "pcmpistri $0x4A, %%xmm2, %%xmm13" "\n\t" "movupd %%xmm0, 48(%0)" "\n\t" "movw %%rcx, 64(%0)" "\n\t" "pushfq" "\n\t" "popq %%r15" "\n\t" "movq %%r15, 72(%0)" "\n\t" : /*out*/ : /*in*/"r"(blockC) : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" ); printf(" istri $0x4A: "); printf(" xmm0 "); show_V128( (V128*)(blockC+48) ); printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); /* ---------------- ISTRI_0A ---------------- */ memset(blockC, 0x55, 80); memcpy(blockC + 0, &argL, 16); memcpy(blockC + 16, &argR, 16); memcpy(blockC + 24, &rdxIN, 8); memcpy(blockC + 32, &raxIN, 8); memcpy(blockC + 40, &rdxIN, 8); __asm__ __volatile__( "movupd 0(%0), %%xmm2" "\n\t" "movupd 16(%0), %%xmm13" "\n\t" "movq 32(%0), %%rdx" "\n\t" "movq 40(%0), %%rax" "\n\t" "movupd 48(%0), %%xmm0" "\n\t" "movw 64(%0), %%rcx" "\n\t" "pcmpistri $0x0A, %%xmm2, %%xmm13" "\n\t" "movupd %%xmm0, 48(%0)" "\n\t" "movw %%rcx, 64(%0)" "\n\t" "pushfq" "\n\t" "popq %%r15" "\n\t" "movq %%r15, 72(%0)" "\n\t" : /*out*/ : /*in*/"r"(blockC) : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" ); printf(" istri $0x0A: "); printf(" xmm0 "); show_V128( (V128*)(blockC+48) ); printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); /* ---------------- ISTRM_4A ---------------- */ memset(blockC, 0x55, 80); memcpy(blockC + 0, &argL, 16); memcpy(blockC + 16, &argR, 16); memcpy(blockC + 24, &rdxIN, 8); memcpy(blockC + 32, &raxIN, 8); memcpy(blockC + 40, &rdxIN, 8); __asm__ __volatile__( "movupd 0(%0), %%xmm2" "\n\t" "movupd 16(%0), %%xmm13" "\n\t" "movq 32(%0), %%rdx" "\n\t" "movq 40(%0), %%rax" "\n\t" "movupd 48(%0), %%xmm0" "\n\t" "movw 64(%0), %%rcx" "\n\t" "pcmpistrm $0x4A, %%xmm2, %%xmm13" "\n\t" "movupd %%xmm0, 48(%0)" "\n\t" "movw %%rcx, 64(%0)" "\n\t" "pushfq" "\n\t" "popq %%r15" "\n\t" "movq %%r15, 72(%0)" "\n\t" : /*out*/ : /*in*/"r"(blockC) : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" ); printf(" istrm $0x4A: "); printf(" xmm0 "); show_V128( (V128*)(blockC+48) ); printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); /* ---------------- ISTRM_0A ---------------- */ memset(blockC, 0x55, 80); memcpy(blockC + 0, &argL, 16); memcpy(blockC + 16, &argR, 16); memcpy(blockC + 24, &rdxIN, 8); memcpy(blockC + 32, &raxIN, 8); memcpy(blockC + 40, &rdxIN, 8); __asm__ __volatile__( "movupd 0(%0), %%xmm2" "\n\t" "movupd 16(%0), %%xmm13" "\n\t" "movq 32(%0), %%rdx" "\n\t" "movq 40(%0), %%rax" "\n\t" "movupd 48(%0), %%xmm0" "\n\t" "movw 64(%0), %%rcx" "\n\t" "pcmpistrm $0x0A, %%xmm2, %%xmm13" "\n\t" "movupd %%xmm0, 48(%0)" "\n\t" "movw %%rcx, 64(%0)" "\n\t" "pushfq" "\n\t" "popq %%r15" "\n\t" "movq %%r15, 72(%0)" "\n\t" : /*out*/ : /*in*/"r"(blockC) : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" ); printf(" istrm $0x0A: "); printf(" xmm0 "); show_V128( (V128*)(blockC+48) ); printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); /* ---------------- ESTRI_4A ---------------- */ memset(blockC, 0x55, 80); memcpy(blockC + 0, &argL, 16); memcpy(blockC + 16, &argR, 16); memcpy(blockC + 24, &rdxIN, 8); memcpy(blockC + 32, &raxIN, 8); memcpy(blockC + 40, &rdxIN, 8); __asm__ __volatile__( "movupd 0(%0), %%xmm2" "\n\t" "movupd 16(%0), %%xmm13" "\n\t" "movq 32(%0), %%rdx" "\n\t" "movq 40(%0), %%rax" "\n\t" "movupd 48(%0), %%xmm0" "\n\t" "movw 64(%0), %%rcx" "\n\t" "pcmpestri $0x4A, %%xmm2, %%xmm13" "\n\t" "movupd %%xmm0, 48(%0)" "\n\t" "movw %%rcx, 64(%0)" "\n\t" "pushfq" "\n\t" "popq %%r15" "\n\t" "movq %%r15, 72(%0)" "\n\t" : /*out*/ : /*in*/"r"(blockC) : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" ); printf(" estri $0x4A: "); printf(" xmm0 "); show_V128( (V128*)(blockC+48) ); printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); /* ---------------- ESTRI_0A ---------------- */ memset(blockC, 0x55, 80); memcpy(blockC + 0, &argL, 16); memcpy(blockC + 16, &argR, 16); memcpy(blockC + 24, &rdxIN, 8); memcpy(blockC + 32, &raxIN, 8); memcpy(blockC + 40, &rdxIN, 8); __asm__ __volatile__( "movupd 0(%0), %%xmm2" "\n\t" "movupd 16(%0), %%xmm13" "\n\t" "movq 32(%0), %%rdx" "\n\t" "movq 40(%0), %%rax" "\n\t" "movupd 48(%0), %%xmm0" "\n\t" "movw 64(%0), %%rcx" "\n\t" "pcmpestri $0x0A, %%xmm2, %%xmm13" "\n\t" "movupd %%xmm0, 48(%0)" "\n\t" "movw %%rcx, 64(%0)" "\n\t" "pushfq" "\n\t" "popq %%r15" "\n\t" "movq %%r15, 72(%0)" "\n\t" : /*out*/ : /*in*/"r"(blockC) : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" ); printf(" estri $0x0A: "); printf(" xmm0 "); show_V128( (V128*)(blockC+48) ); printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); /* ---------------- ESTRM_4A ---------------- */ memset(blockC, 0x55, 80); memcpy(blockC + 0, &argL, 16); memcpy(blockC + 16, &argR, 16); memcpy(blockC + 24, &rdxIN, 8); memcpy(blockC + 32, &raxIN, 8); memcpy(blockC + 40, &rdxIN, 8); __asm__ __volatile__( "movupd 0(%0), %%xmm2" "\n\t" "movupd 16(%0), %%xmm13" "\n\t" "movq 32(%0), %%rdx" "\n\t" "movq 40(%0), %%rax" "\n\t" "movupd 48(%0), %%xmm0" "\n\t" "movw 64(%0), %%rcx" "\n\t" "pcmpestrm $0x4A, %%xmm2, %%xmm13" "\n\t" "movupd %%xmm0, 48(%0)" "\n\t" "movw %%rcx, 64(%0)" "\n\t" "pushfq" "\n\t" "popq %%r15" "\n\t" "movq %%r15, 72(%0)" "\n\t" : /*out*/ : /*in*/"r"(blockC) : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" ); printf(" estrm $0x4A: "); printf(" xmm0 "); show_V128( (V128*)(blockC+48) ); printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); /* ---------------- ESTRM_0A ---------------- */ memset(blockC, 0x55, 80); memcpy(blockC + 0, &argL, 16); memcpy(blockC + 16, &argR, 16); memcpy(blockC + 24, &rdxIN, 8); memcpy(blockC + 32, &raxIN, 8); memcpy(blockC + 40, &rdxIN, 8); __asm__ __volatile__( "movupd 0(%0), %%xmm2" "\n\t" "movupd 16(%0), %%xmm13" "\n\t" "movq 32(%0), %%rdx" "\n\t" "movq 40(%0), %%rax" "\n\t" "movupd 48(%0), %%xmm0" "\n\t" "movw 64(%0), %%rcx" "\n\t" "pcmpestrm $0x0A, %%xmm2, %%xmm13" "\n\t" "movupd %%xmm0, 48(%0)" "\n\t" "movw %%rcx, 64(%0)" "\n\t" "pushfq" "\n\t" "popq %%r15" "\n\t" "movq %%r15, 72(%0)" "\n\t" : /*out*/ : /*in*/"r"(blockC) : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" ); printf(" estrm $0x0A: "); printf(" xmm0 "); show_V128( (V128*)(blockC+48) ); printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); } int main ( void ) { one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 ); one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 ); one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 ); one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 ); one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 ); one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 ); one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 ); one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 ); one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 ); one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 ); one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 ); one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 ); one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 ); one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 ); one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 ); one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 ); one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 ); one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 ); one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 ); return 0; }