/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
pcmpistri to drive it. Does not check the e-vs-i or i-vs-m
aspect. */
#include <string.h>
#include <stdio.h>
#include <assert.h>
typedef unsigned int UInt;
typedef signed int Int;
typedef unsigned char UChar;
typedef unsigned long long int ULong;
typedef UChar Bool;
#define False ((Bool)0)
#define True ((Bool)1)
//typedef unsigned char V128[16];
typedef
union {
UChar uChar[16];
UInt uInt[4];
}
V128;
#define SHIFT_O 11
#define SHIFT_S 7
#define SHIFT_Z 6
#define SHIFT_A 4
#define SHIFT_C 0
#define SHIFT_P 2
#define MASK_O (1ULL << SHIFT_O)
#define MASK_S (1ULL << SHIFT_S)
#define MASK_Z (1ULL << SHIFT_Z)
#define MASK_A (1ULL << SHIFT_A)
#define MASK_C (1ULL << SHIFT_C)
#define MASK_P (1ULL << SHIFT_P)
UInt clz32 ( UInt x )
{
Int y, m, n;
y = -(x >> 16);
m = (y >> 16) & 16;
n = 16 - m;
x = x >> m;
y = x - 0x100;
m = (y >> 16) & 8;
n = n + m;
x = x << m;
y = x - 0x1000;
m = (y >> 16) & 4;
n = n + m;
x = x << m;
y = x - 0x4000;
m = (y >> 16) & 2;
n = n + m;
x = x << m;
y = x >> 14;
m = y & ~(y >> 1);
return n + 2 - m;
}
UInt ctz32 ( UInt x )
{
return 32 - clz32((~x) & (x-1));
}
void expand ( V128* dst, char* summary )
{
Int i;
assert( strlen(summary) == 16 );
for (i = 0; i < 16; i++) {
UChar xx = 0;
UChar x = summary[15-i];
if (x >= '0' && x <= '9') { xx = x - '0'; }
else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
else assert(0);
assert(xx < 16);
xx = (xx << 4) | xx;
assert(xx < 256);
dst->uChar[i] = xx;
}
}
void try_istri ( char* which,
UInt(*h_fn)(V128*,V128*),
UInt(*s_fn)(V128*,V128*),
char* summL, char* summR )
{
assert(strlen(which) == 2);
V128 argL, argR;
expand(&argL, summL);
expand(&argR, summR);
UInt h_res = h_fn(&argL, &argR);
UInt s_res = s_fn(&argL, &argR);
printf("istri %s %s %s -> %08x %08x %s\n",
which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
}
UInt zmask_from_V128 ( V128* arg )
{
UInt i, res = 0;
for (i = 0; i < 16; i++) {
res |= ((arg->uChar[i] == 0) ? 1 : 0) << i;
}
return res;
}
//////////////////////////////////////////////////////////
// //
// GENERAL //
// //
//////////////////////////////////////////////////////////
/* Given partial results from a pcmpXstrX operation (intRes1,
basically), generate an I format (index value for ECX) output, and
also the new OSZACP flags.
*/
static
void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
/*OUT*/UInt* resOSZACP,
UInt intRes1,
UInt zmaskL, UInt zmaskR,
UInt validL,
UInt pol, UInt idx )
{
assert((pol >> 2) == 0);
assert((idx >> 1) == 0);
UInt intRes2 = 0;
switch (pol) {
case 0: intRes2 = intRes1; break; // pol +
case 1: intRes2 = ~intRes1; break; // pol -
case 2: intRes2 = intRes1; break; // pol m+
case 3: intRes2 = intRes1 ^ validL; break; // pol m-
}
intRes2 &= 0xFFFF;
// generate ecx value
UInt newECX = 0;
if (idx) {
// index of ms-1-bit
newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
} else {
// index of ls-1-bit
newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
}
*(UInt*)(&resV[0]) = newECX;
// generate new flags, common to all ISTRI and ISTRM cases
*resOSZACP // A, P are zero
= ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
| ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
| ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
| ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
}
/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
variants.
For xSTRI variants, the new ECX value is placed in the 32 bits
pointed to by *resV. For xSTRM variants, the result is a 128 bit
value and is placed at *resV in the obvious way.
For all variants, the new OSZACP value is placed at *resOSZACP.
argLV and argRV are the vector args. The caller must prepare a
16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
must be 1 for each zero byte of of the respective arg. For ESTRx
variants this is derived from the explicit length indication, and
must be 0 in all places except at the bit index corresponding to
the valid length (0 .. 16). If the valid length is 16 then the
mask must be all zeroes. In all cases, bits 31:16 must be zero.
imm8 is the original immediate from the instruction. isSTRM
indicates whether this is a xSTRM or xSTRI variant, which controls
how much of *res is written.
If the given imm8 case can be handled, the return value is True.
If not, False is returned, and neither *res not *resOSZACP are
altered.
*/
Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
/*OUT*/UInt* resOSZACP,
V128* argLV, V128* argRV,
UInt zmaskL, UInt zmaskR,
UInt imm8, Bool isSTRM )
{
assert(imm8 < 0x80);
assert((zmaskL >> 16) == 0);
assert((zmaskR >> 16) == 0);
/* Explicitly reject any imm8 values that haven't been validated,
even if they would probably work. Life is too short to have
unvalidated cases in the code base. */
switch (imm8) {
case 0x00:
case 0x02: case 0x08: case 0x0C: case 0x12: case 0x1A:
case 0x38: case 0x3A: case 0x44: case 0x4A:
break;
default:
return False;
}
UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
/*----------------------------------------*/
/*-- strcmp on byte data --*/
/*----------------------------------------*/
if (agg == 2/*equal each, aka strcmp*/
&& (fmt == 0/*ub*/ || fmt == 2/*sb*/)
&& !isSTRM) {
Int i;
UChar* argL = (UChar*)argLV;
UChar* argR = (UChar*)argRV;
UInt boolResII = 0;
for (i = 15; i >= 0; i--) {
UChar cL = argL[i];
UChar cR = argR[i];
boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
}
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
// do invalidation, common to all equal-each cases
UInt intRes1
= (boolResII & validL & validR) // if both valid, use cmpres
| (~ (validL | validR)); // if both invalid, force 1
// else force 0
intRes1 &= 0xFFFF;
// generate I-format output
pcmpXstrX_WRK_gen_output_fmt_I(
resV, resOSZACP,
intRes1, zmaskL, zmaskR, validL, pol, idx
);
return True;
}
/*----------------------------------------*/
/*-- set membership on byte data --*/
/*----------------------------------------*/
if (agg == 0/*equal any, aka find chars in a set*/
&& (fmt == 0/*ub*/ || fmt == 2/*sb*/)
&& !isSTRM) {
/* argL: the string, argR: charset */
UInt si, ci;
UChar* argL = (UChar*)argLV;
UChar* argR = (UChar*)argRV;
UInt boolRes = 0;
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
for (si = 0; si < 16; si++) {
if ((validL & (1 << si)) == 0)
// run off the end of the string.
break;
UInt m = 0;
for (ci = 0; ci < 16; ci++) {
if ((validR & (1 << ci)) == 0) break;
if (argR[ci] == argL[si]) { m = 1; break; }
}
boolRes |= (m << si);
}
// boolRes is "pre-invalidated"
UInt intRes1 = boolRes & 0xFFFF;
// generate I-format output
pcmpXstrX_WRK_gen_output_fmt_I(
resV, resOSZACP,
intRes1, zmaskL, zmaskR, validL, pol, idx
);
return True;
}
/*----------------------------------------*/
/*-- substring search on byte data --*/
/*----------------------------------------*/
if (agg == 3/*equal ordered, aka substring search*/
&& (fmt == 0/*ub*/ || fmt == 2/*sb*/)
&& !isSTRM) {
/* argL: haystack, argR: needle */
UInt ni, hi;
UChar* argL = (UChar*)argLV;
UChar* argR = (UChar*)argRV;
UInt boolRes = 0;
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
for (hi = 0; hi < 16; hi++) {
UInt m = 1;
for (ni = 0; ni < 16; ni++) {
if ((validR & (1 << ni)) == 0) break;
UInt i = ni + hi;
if (i >= 16) break;
if (argL[i] != argR[ni]) { m = 0; break; }
}
boolRes |= (m << hi);
if ((validL & (1 << hi)) == 0)
// run off the end of the haystack
break;
}
// boolRes is "pre-invalidated"
UInt intRes1 = boolRes & 0xFFFF;
// generate I-format output
pcmpXstrX_WRK_gen_output_fmt_I(
resV, resOSZACP,
intRes1, zmaskL, zmaskR, validL, pol, idx
);
return True;
}
/*----------------------------------------*/
/*-- ranges, unsigned byte data --*/
/*----------------------------------------*/
if (agg == 1/*ranges*/
&& fmt == 0/*ub*/
&& !isSTRM) {
/* argL: string, argR: range-pairs */
UInt ri, si;
UChar* argL = (UChar*)argLV;
UChar* argR = (UChar*)argRV;
UInt boolRes = 0;
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
for (si = 0; si < 16; si++) {
if ((validL & (1 << si)) == 0)
// run off the end of the string
break;
UInt m = 0;
for (ri = 0; ri < 16; ri += 2) {
if ((validR & (3 << ri)) != (3 << ri)) break;
if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
m = 1; break;
}
}
boolRes |= (m << si);
}
// boolRes is "pre-invalidated"
UInt intRes1 = boolRes & 0xFFFF;
// generate I-format output
pcmpXstrX_WRK_gen_output_fmt_I(
resV, resOSZACP,
intRes1, zmaskL, zmaskR, validL, pol, idx
);
return True;
}
return False;
}
//////////////////////////////////////////////////////////
// //
// ISTRI_4A //
// //
//////////////////////////////////////////////////////////
UInt h_pcmpistri_4A ( V128* argL, V128* argR )
{
V128 block[2];
memcpy(&block[0], argL, sizeof(V128));
memcpy(&block[1], argR, sizeof(V128));
ULong res, flags;
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu 0(%2), %%xmm2" "\n\t"
"movdqu 16(%2), %%xmm11" "\n\t"
"pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t"
"pushfq" "\n\t"
"popq %%rdx" "\n\t"
"movq %%rcx, %0" "\n\t"
"movq %%rdx, %1" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
);
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}
UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
{
V128 resV;
UInt resOSZACP, resECX;
Bool ok
= pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
zmask_from_V128(argLU),
zmask_from_V128(argRU),
0x4A, False/*!isSTRM*/
);
assert(ok);
resECX = resV.uInt[0];
return (resOSZACP << 16) | resECX;
}
void istri_4A ( void )
{
char* wot = "4A";
UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
}
//////////////////////////////////////////////////////////
// //
// ISTRI_3A //
// //
//////////////////////////////////////////////////////////
UInt h_pcmpistri_3A ( V128* argL, V128* argR )
{
V128 block[2];
memcpy(&block[0], argL, sizeof(V128));
memcpy(&block[1], argR, sizeof(V128));
ULong res, flags;
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu 0(%2), %%xmm2" "\n\t"
"movdqu 16(%2), %%xmm11" "\n\t"
"pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t"
"pushfq" "\n\t"
"popq %%rdx" "\n\t"
"movq %%rcx, %0" "\n\t"
"movq %%rdx, %1" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
);
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}
UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
{
V128 resV;
UInt resOSZACP, resECX;
Bool ok
= pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
zmask_from_V128(argLU),
zmask_from_V128(argRU),
0x3A, False/*!isSTRM*/
);
assert(ok);
resECX = resV.uInt[0];
return (resOSZACP << 16) | resECX;
}
void istri_3A ( void )
{
char* wot = "3A";
UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
}
//////////////////////////////////////////////////////////
// //
// ISTRI_0C //
// //
//////////////////////////////////////////////////////////
__attribute__((noinline))
UInt h_pcmpistri_0C ( V128* argL, V128* argR )
{
V128 block[2];
memcpy(&block[0], argL, sizeof(V128));
memcpy(&block[1], argR, sizeof(V128));
ULong res = 0, flags = 0;
__asm__ __volatile__(
"movdqu 0(%2), %%xmm2" "\n\t"
"movdqu 16(%2), %%xmm11" "\n\t"
"pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t"
//"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t"
//"movd %%xmm0, %%ecx" "\n\t"
"pushfq" "\n\t"
"popq %%rdx" "\n\t"
"movq %%rcx, %0" "\n\t"
"movq %%rdx, %1" "\n\t"
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
);
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}
UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
{
V128 resV;
UInt resOSZACP, resECX;
Bool ok
= pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
zmask_from_V128(argLU),
zmask_from_V128(argRU),
0x0C, False/*!isSTRM*/
);
assert(ok);
resECX = resV.uInt[0];
return (resOSZACP << 16) | resECX;
}
void istri_0C ( void )
{
char* wot = "0C";
UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
try_istri(wot,h,s, "1111111111111234", "0000000000000000");
try_istri(wot,h,s, "1111111111111234", "0000000000000001");
try_istri(wot,h,s, "1111111111111234", "0000000000000011");
try_istri(wot,h,s, "1111111111111234", "1111111111111234");
try_istri(wot,h,s, "a111111111111111", "000000000000000a");
try_istri(wot,h,s, "b111111111111111", "000000000000000a");
try_istri(wot,h,s, "b111111111111111", "0000000000000000");
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
}
//////////////////////////////////////////////////////////
// //
// ISTRI_08 //
// //
//////////////////////////////////////////////////////////
UInt h_pcmpistri_08 ( V128* argL, V128* argR )
{
V128 block[2];
memcpy(&block[0], argL, sizeof(V128));
memcpy(&block[1], argR, sizeof(V128));
ULong res, flags;
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu 0(%2), %%xmm2" "\n\t"
"movdqu 16(%2), %%xmm11" "\n\t"
"pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t"
"pushfq" "\n\t"
"popq %%rdx" "\n\t"
"movq %%rcx, %0" "\n\t"
"movq %%rdx, %1" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
);
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}
UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
{
V128 resV;
UInt resOSZACP, resECX;
Bool ok
= pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
zmask_from_V128(argLU),
zmask_from_V128(argRU),
0x08, False/*!isSTRM*/
);
assert(ok);
resECX = resV.uInt[0];
return (resOSZACP << 16) | resECX;
}
void istri_08 ( void )
{
char* wot = "08";
UInt(*h)(V128*,V128*) = h_pcmpistri_08;
UInt(*s)(V128*,V128*) = s_pcmpistri_08;
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
}
//////////////////////////////////////////////////////////
// //
// ISTRI_1A //
// //
//////////////////////////////////////////////////////////
UInt h_pcmpistri_1A ( V128* argL, V128* argR )
{
V128 block[2];
memcpy(&block[0], argL, sizeof(V128));
memcpy(&block[1], argR, sizeof(V128));
ULong res, flags;
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu 0(%2), %%xmm2" "\n\t"
"movdqu 16(%2), %%xmm11" "\n\t"
"pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t"
"pushfq" "\n\t"
"popq %%rdx" "\n\t"
"movq %%rcx, %0" "\n\t"
"movq %%rdx, %1" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
);
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}
UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
{
V128 resV;
UInt resOSZACP, resECX;
Bool ok
= pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
zmask_from_V128(argLU),
zmask_from_V128(argRU),
0x1A, False/*!isSTRM*/
);
assert(ok);
resECX = resV.uInt[0];
return (resOSZACP << 16) | resECX;
}
void istri_1A ( void )
{
char* wot = "1A";
UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
}
//////////////////////////////////////////////////////////
// //
// ISTRI_02 //
// //
//////////////////////////////////////////////////////////
UInt h_pcmpistri_02 ( V128* argL, V128* argR )
{
V128 block[2];
memcpy(&block[0], argL, sizeof(V128));
memcpy(&block[1], argR, sizeof(V128));
ULong res, flags;
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu 0(%2), %%xmm2" "\n\t"
"movdqu 16(%2), %%xmm11" "\n\t"
"pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t"
//"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t"
//"movd %%xmm0, %%ecx" "\n\t"
"pushfq" "\n\t"
"popq %%rdx" "\n\t"
"movq %%rcx, %0" "\n\t"
"movq %%rdx, %1" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
);
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}
UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
{
V128 resV;
UInt resOSZACP, resECX;
Bool ok
= pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
zmask_from_V128(argLU),
zmask_from_V128(argRU),
0x02, False/*!isSTRM*/
);
assert(ok);
resECX = resV.uInt[0];
return (resOSZACP << 16) | resECX;
}
void istri_02 ( void )
{
char* wot = "02";
UInt(*h)(V128*,V128*) = h_pcmpistri_02;
UInt(*s)(V128*,V128*) = s_pcmpistri_02;
try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
}
//////////////////////////////////////////////////////////
// //
// ISTRI_12 //
// //
//////////////////////////////////////////////////////////
UInt h_pcmpistri_12 ( V128* argL, V128* argR )
{
V128 block[2];
memcpy(&block[0], argL, sizeof(V128));
memcpy(&block[1], argR, sizeof(V128));
ULong res, flags;
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu 0(%2), %%xmm2" "\n\t"
"movdqu 16(%2), %%xmm11" "\n\t"
"pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t"
//"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t"
//"movd %%xmm0, %%ecx" "\n\t"
"pushfq" "\n\t"
"popq %%rdx" "\n\t"
"movq %%rcx, %0" "\n\t"
"movq %%rdx, %1" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
);
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}
UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
{
V128 resV;
UInt resOSZACP, resECX;
Bool ok
= pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
zmask_from_V128(argLU),
zmask_from_V128(argRU),
0x12, False/*!isSTRM*/
);
assert(ok);
resECX = resV.uInt[0];
return (resOSZACP << 16) | resECX;
}
void istri_12 ( void )
{
char* wot = "12";
UInt(*h)(V128*,V128*) = h_pcmpistri_12;
UInt(*s)(V128*,V128*) = s_pcmpistri_12;
try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
}
//////////////////////////////////////////////////////////
// //
// ISTRI_44 //
// //
//////////////////////////////////////////////////////////
UInt h_pcmpistri_44 ( V128* argL, V128* argR )
{
V128 block[2];
memcpy(&block[0], argL, sizeof(V128));
memcpy(&block[1], argR, sizeof(V128));
ULong res, flags;
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu 0(%2), %%xmm2" "\n\t"
"movdqu 16(%2), %%xmm11" "\n\t"
"pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t"
//"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t"
//"movd %%xmm0, %%ecx" "\n\t"
"pushfq" "\n\t"
"popq %%rdx" "\n\t"
"movq %%rcx, %0" "\n\t"
"movq %%rdx, %1" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
);
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}
UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
{
V128 resV;
UInt resOSZACP, resECX;
Bool ok
= pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
zmask_from_V128(argLU),
zmask_from_V128(argRU),
0x44, False/*!isSTRM*/
);
assert(ok);
resECX = resV.uInt[0];
return (resOSZACP << 16) | resECX;
}
void istri_44 ( void )
{
char* wot = "44";
UInt(*h)(V128*,V128*) = h_pcmpistri_44;
UInt(*s)(V128*,V128*) = s_pcmpistri_44;
try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
}
//////////////////////////////////////////////////////////
// //
// ISTRI_00 //
// //
//////////////////////////////////////////////////////////
UInt h_pcmpistri_00 ( V128* argL, V128* argR )
{
V128 block[2];
memcpy(&block[0], argL, sizeof(V128));
memcpy(&block[1], argR, sizeof(V128));
ULong res, flags;
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu 0(%2), %%xmm2" "\n\t"
"movdqu 16(%2), %%xmm11" "\n\t"
"pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t"
//"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t"
//"movd %%xmm0, %%ecx" "\n\t"
"pushfq" "\n\t"
"popq %%rdx" "\n\t"
"movq %%rcx, %0" "\n\t"
"movq %%rdx, %1" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
);
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}
UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
{
V128 resV;
UInt resOSZACP, resECX;
Bool ok
= pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
zmask_from_V128(argLU),
zmask_from_V128(argRU),
0x00, False/*!isSTRM*/
);
assert(ok);
resECX = resV.uInt[0];
return (resOSZACP << 16) | resECX;
}
void istri_00 ( void )
{
char* wot = "00";
UInt(*h)(V128*,V128*) = h_pcmpistri_00;
UInt(*s)(V128*,V128*) = s_pcmpistri_00;
try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
}
//////////////////////////////////////////////////////////
// //
// ISTRI_38 //
// //
//////////////////////////////////////////////////////////
UInt h_pcmpistri_38 ( V128* argL, V128* argR )
{
V128 block[2];
memcpy(&block[0], argL, sizeof(V128));
memcpy(&block[1], argR, sizeof(V128));
ULong res, flags;
__asm__ __volatile__(
"subq $1024, %%rsp" "\n\t"
"movdqu 0(%2), %%xmm2" "\n\t"
"movdqu 16(%2), %%xmm11" "\n\t"
"pcmpistri $0x38, %%xmm2, %%xmm11" "\n\t"
"pushfq" "\n\t"
"popq %%rdx" "\n\t"
"movq %%rcx, %0" "\n\t"
"movq %%rdx, %1" "\n\t"
"addq $1024, %%rsp" "\n\t"
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
);
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
}
UInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
{
V128 resV;
UInt resOSZACP, resECX;
Bool ok
= pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
zmask_from_V128(argLU),
zmask_from_V128(argRU),
0x38, False/*!isSTRM*/
);
assert(ok);
resECX = resV.uInt[0];
return (resOSZACP << 16) | resECX;
}
void istri_38 ( void )
{
char* wot = "38";
UInt(*h)(V128*,V128*) = h_pcmpistri_38;
UInt(*s)(V128*,V128*) = s_pcmpistri_38;
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
}
//////////////////////////////////////////////////////////
// //
// main //
// //
//////////////////////////////////////////////////////////
int main ( void )
{
istri_4A();
istri_3A();
istri_08();
istri_1A();
istri_02();
istri_0C();
istri_12();
istri_44();
istri_00();
istri_38();
return 0;
}