#include <stdio.h> #include <stdlib.h> #include <assert.h> typedef unsigned char UChar; typedef unsigned int UInt; static UInt randomUInt ( void ) { static UInt n = 0; /* From "Numerical Recipes in C" 2nd Edition */ n = 1664525UL * n + 1013904223UL; return n >> 17; } void maskmovq_mmx ( UChar* regL, UChar* regR ) { int i; UChar* dst = malloc(8); assert(dst); for (i = 0; i < 8; i++) dst[i] = 17 * (i+1); __asm__ __volatile__( "emms\n\t" "movq (%0), %%mm1\n\t" "movq (%1), %%mm2\n\t" "movq %2, %%rdi\n\t" "maskmovq %%mm1,%%mm2" : /*out*/ : /*in*/ "r"(regL), "r"(regR), "r"(&dst[0]) : /*trash*/ "rdi", "memory", "cc" ); for (i = 0; i < 8; i++) printf("%02x", dst[i]); free(dst); } void maskmovdqu_sse ( UChar* regL, UChar* regR ) { int i; UChar* dst = malloc(16); assert(dst); for (i = 0; i < 16; i++) dst[i] = i; __asm__ __volatile__( "movups (%0), %%xmm1\n\t" "movups (%1), %%xmm12\n\t" "movq %2, %%rdi\n\t" "maskmovdqu %%xmm12,%%xmm1\n\t" "sfence" : /*out*/ : /*in*/ "r"(regL), "r"(regR), "r"(dst) : /*trash*/ "rdi", "memory", "cc" ); for (i = 0; i < 16; i++) printf("%02x", dst[i]); free(dst); } int main ( int argc, char** argv ) { int i, j; /* mmx test */ { UChar* regL = malloc(8); UChar* regR = malloc(8); assert(regL); assert(regR); for (i = 0; i < 10; i++) { for (j = 0; j < 8; j++) { regL[j] = (UChar)randomUInt(); printf("%02x", regL[j]); } printf(" "); for (j = 0; j < 8; j++) { regR[j] = (UChar)randomUInt(); printf("%02x", regR[j]); } printf(" "); maskmovq_mmx( regR, regL ); printf("\n"); } } /* sse test */ { UChar* regL = malloc(16); UChar* regR = malloc(16); assert(regL); assert(regR); for (i = 0; i < 10; i++) { for (j = 0; j < 16; j++) { regL[j] = (UChar)randomUInt(); printf("%02x", regL[j]); } printf(" "); for (j = 0; j < 16; j++) { regR[j] = (UChar)randomUInt(); printf("%02x", regR[j]); } printf(" "); maskmovdqu_sse( regR, regL ); printf("\n"); } } return 0; }