/* HOW TO USE 13 Dec '05 - Linker no longer used (apart from mymalloc) Simply compile and link switchback.c with test_xxx.c, e.g. for ppc64: $ (cd .. && make EXTRA_CFLAGS="-m64" libvex_ppc64_linux.a) && gcc -m64 -mregnames -Wall -Wshadow -Wno-long-long -Winline -O -g -o switchback switchback.c linker.c ../libvex_ppc64_linux.a test_xxx.c Test file test_xxx.c must have an entry point called "entry", which expects to take a single argument which is a function pointer (to "serviceFn"). Test file may not reference any other symbols. NOTE: POWERPC: it is critical, when using this on ppc, to set CacheLineSize to the right value. Values we currently know of: imac (G3): 32 G5 (ppc970): 128 */ #include <stdio.h> #include <assert.h> #include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include "../pub/libvex_basictypes.h" #include "../pub/libvex_guest_x86.h" #include "../pub/libvex_guest_amd64.h" #include "../pub/libvex_guest_ppc32.h" #include "../pub/libvex_guest_ppc64.h" #include "../pub/libvex.h" #include "../pub/libvex_trc_values.h" #include "linker.h" static ULong n_bbs_done = 0; static Int n_translations_made = 0; #if defined(__i386__) # define VexGuestState VexGuestX86State # define LibVEX_Guest_initialise LibVEX_GuestX86_initialise # define VexArch VexArchX86 # define VexSubArch VexSubArchX86_sse1 # define GuestPC guest_EIP # define CacheLineSize 0/*irrelevant*/ #elif defined(__x86_64__) # define VexGuestState VexGuestAMD64State # define LibVEX_Guest_initialise LibVEX_GuestAMD64_initialise # define VexArch VexArchAMD64 # define VexSubArch VexSubArch_NONE # define GuestPC guest_RIP # define CacheLineSize 0/*irrelevant*/ #elif defined(__powerpc__) #if !defined(__powerpc64__) // ppc32 # define VexGuestState VexGuestPPC32State # define LibVEX_Guest_initialise LibVEX_GuestPPC32_initialise # define VexArch VexArchPPC32 # define VexSubArch VexSubArchPPC32_FI # define GuestPC guest_CIA # define CacheLineSize 128 #else # define VexGuestState VexGuestPPC64State # define LibVEX_Guest_initialise LibVEX_GuestPPC64_initialise # define VexArch VexArchPPC64 # define VexSubArch VexSubArchPPC64_FI # define GuestPC guest_CIA # define CacheLineSize 128 #endif #else # error "Unknown arch" #endif /* 7: show conversion into IR */ /* 6: show after initial opt */ /* 5: show after instrumentation */ /* 4: show after second opt */ /* 3: show after tree building */ /* 2: show selected insns */ /* 1: show after reg-alloc */ /* 0: show final assembly */ #define TEST_FLAGS (1<<7)|(1<<3)|(1<<2)|(1<<1)|(1<<0) #define DEBUG_TRACE_FLAGS 0//(1<<7)|(0<<6)|(0<<5)|(0<<4)|(1<<3)|(1<<2)|(1<<1)|(1<<0) /* guest state */ UInt gstack[50000]; VexGuestState gst; VexControl vcon; /* only used for the switchback transition */ /* i386: helper1 = &gst, helper2 = %EFLAGS */ /* amd64: helper1 = &gst, helper2 = %EFLAGS */ /* ppc32: helper1 = &gst, helper2 = %CR, helper3 = %XER */ HWord sb_helper1 = 0; HWord sb_helper2 = 0; HWord sb_helper3 = 0; /* translation cache */ #define N_TRANS_CACHE 1000000 #define N_TRANS_TABLE 10000 ULong trans_cache[N_TRANS_CACHE]; VexGuestExtents trans_table [N_TRANS_TABLE]; ULong* trans_tableP[N_TRANS_TABLE]; Int trans_cache_used = 0; Int trans_table_used = 0; static Bool chase_into_ok ( Addr64 dst ) { return False; } #if 0 // local_sys_write_stderr(&c,1); static void local_sys_write_stderr ( HChar* buf, Int n ) { UInt __res; __asm__ volatile ( "li %%r0,4\n\t" /* set %r0 = __NR_write */ "li %%r3,1\n\t" /* set %r3 = stdout */ "mr %%r4,%1\n\t" /* set %r4 = buf */ "mr %%r5,%2\n\t" /* set %r5 = n */ "sc\n\t" /* write(stderr, buf, n) */ "mr %0,%%r3\n" /* set __res = r3 */ : "=mr" (__res) : "g" (buf), "g" (n) : "r0", "r3", "r4", "r5" ); } #endif /* For providing services. */ static HWord serviceFn ( HWord arg1, HWord arg2 ) { switch (arg1) { case 0: /* EXIT */ printf("---STOP---\n"); printf("serviceFn:EXIT\n"); printf("%llu bbs simulated\n", n_bbs_done); printf("%d translations made, %d tt bytes\n", n_translations_made, 8*trans_cache_used); exit(0); case 1: /* PUTC */ putchar(arg2); return 0; case 2: /* MALLOC */ return (HWord)malloc(arg2); case 3: /* FREE */ free((void*)arg2); return 0; default: assert(0); } } /* -------------------- */ /* continue execution on the real CPU (never returns) */ extern void switchback_asm(void); #if defined(__i386__) asm( "switchback_asm:\n" " movl sb_helper1, %eax\n" // eax = guest state ptr " movl 16(%eax), %esp\n" // switch stacks " pushl 56(%eax)\n" // push continuation addr " movl sb_helper2, %ebx\n" // get eflags " pushl %ebx\n" // eflags:CA " pushl 0(%eax)\n" // EAX:eflags:CA " movl 4(%eax), %ecx\n" " movl 8(%eax), %edx\n" " movl 12(%eax), %ebx\n" " movl 20(%eax), %ebp\n" " movl 24(%eax), %esi\n" " movl 28(%eax), %edi\n" " popl %eax\n" " popfl\n" " ret\n" ); void switchback ( void ) { sb_helper1 = (HWord)&gst; sb_helper2 = LibVEX_GuestX86_get_eflags(&gst); switchback_asm(); // never returns } #elif defined(__x86_64__) asm( "switchback_asm:\n" " movq sb_helper1, %rax\n" // rax = guest state ptr " movq 32(%rax), %rsp\n" // switch stacks " pushq 168(%rax)\n" // push continuation addr " movq sb_helper2, %rbx\n" // get eflags " pushq %rbx\n" // eflags:CA " pushq 0(%rax)\n" // RAX:eflags:CA " movq 8(%rax), %rcx\n" " movq 16(%rax), %rdx\n" " movq 24(%rax), %rbx\n" " movq 40(%rax), %rbp\n" " movq 48(%rax), %rsi\n" " movq 56(%rax), %rdi\n" " movq 64(%rax), %r8\n" " movq 72(%rax), %r9\n" " movq 80(%rax), %r10\n" " movq 88(%rax), %r11\n" " movq 96(%rax), %r12\n" " movq 104(%rax), %r13\n" " movq 112(%rax), %r14\n" " movq 120(%rax), %r15\n" " popq %rax\n" " popfq\n" " ret\n" ); void switchback ( void ) { sb_helper1 = (HWord)&gst; sb_helper2 = LibVEX_GuestAMD64_get_rflags(&gst); switchback_asm(); // never returns } #elif defined(__powerpc__) static void invalidate_icache(void *ptr, int nbytes) { unsigned long startaddr = (unsigned long) ptr; unsigned long endaddr = startaddr + nbytes; unsigned long addr; unsigned long cls = CacheLineSize; startaddr &= ~(cls - 1); for (addr = startaddr; addr < endaddr; addr += cls) asm volatile("dcbst 0,%0" : : "r" (addr)); asm volatile("sync"); for (addr = startaddr; addr < endaddr; addr += cls) asm volatile("icbi 0,%0" : : "r" (addr)); asm volatile("sync; isync"); } #if !defined(__powerpc64__) // ppc32 asm( "switchback_asm:\n" // gst " lis %r31,sb_helper1@ha\n" // get hi-wd of guest_state_ptr addr " lwz %r31,sb_helper1@l(%r31)\n" // load word of guest_state_ptr to r31 // LR " lwz %r3,900(%r31)\n" // guest_LR " mtlr %r3\n" // move to LR // CR " lis %r3,sb_helper2@ha\n" // get hi-wd of flags addr " lwz %r3,sb_helper2@l(%r3)\n" // load flags word to r3 " mtcr %r3\n" // move r3 to CR // CTR " lwz %r3,904(%r31)\n" // guest_CTR " mtctr %r3\n" // move r3 to CTR // XER " lis %r3,sb_helper3@ha\n" // get hi-wd of xer addr " lwz %r3,sb_helper3@l(%r3)\n" // load xer word to r3 " mtxer %r3\n" // move r3 to XER // GPR's " lwz %r0, 0(%r31)\n" " lwz %r1, 4(%r31)\n" // switch stacks (r1 = SP) " lwz %r2, 8(%r31)\n" " lwz %r3, 12(%r31)\n" " lwz %r4, 16(%r31)\n" " lwz %r5, 20(%r31)\n" " lwz %r6, 24(%r31)\n" " lwz %r7, 28(%r31)\n" " lwz %r8, 32(%r31)\n" " lwz %r9, 36(%r31)\n" " lwz %r10, 40(%r31)\n" " lwz %r11, 44(%r31)\n" " lwz %r12, 48(%r31)\n" " lwz %r13, 52(%r31)\n" " lwz %r14, 56(%r31)\n" " lwz %r15, 60(%r31)\n" " lwz %r16, 64(%r31)\n" " lwz %r17, 68(%r31)\n" " lwz %r18, 72(%r31)\n" " lwz %r19, 76(%r31)\n" " lwz %r20, 80(%r31)\n" " lwz %r21, 84(%r31)\n" " lwz %r22, 88(%r31)\n" " lwz %r23, 92(%r31)\n" " lwz %r24, 96(%r31)\n" " lwz %r25, 100(%r31)\n" " lwz %r26, 104(%r31)\n" " lwz %r27, 108(%r31)\n" " lwz %r28, 112(%r31)\n" " lwz %r29, 116(%r31)\n" " lwz %r30, 120(%r31)\n" " lwz %r31, 124(%r31)\n" "nop_start_point:\n" " nop\n" " nop\n" " nop\n" " nop\n" " nop\n" "nop_end_point:\n" ); #else // ppc64 asm( ".text\n" " .global switchback_asm\n" " .section \".opd\",\"aw\"\n" " .align 3\n" "switchback_asm:\n" " .quad .switchback_asm,.TOC.@tocbase,0\n" " .previous\n" " .type .switchback_asm,@function\n" " .global .switchback_asm\n" ".switchback_asm:\n" "switchback_asm_undotted:\n" // gst: load word of guest_state_ptr to r31 " lis %r31,sb_helper1@highest\n" " ori %r31,%r31,sb_helper1@higher\n" " rldicr %r31,%r31,32,31\n" " oris %r31,%r31,sb_helper1@h\n" " ori %r31,%r31,sb_helper1@l\n" " ld %r31,0(%r31)\n" // LR " ld %r3,1032(%r31)\n" // guest_LR " mtlr %r3\n" // move to LR // CR " lis %r3,sb_helper2@highest\n" " ori %r3,%r3,sb_helper2@higher\n" " rldicr %r3,%r3,32,31\n" " oris %r3,%r3,sb_helper2@h\n" " ori %r3,%r3,sb_helper2@l\n" " ld %r3,0(%r3)\n" // load flags word to r3 " mtcr %r3\n" // move r3 to CR // CTR " ld %r3,1040(%r31)\n" // guest_CTR " mtctr %r3\n" // move r3 to CTR // XER " lis %r3,sb_helper3@highest\n" " ori %r3,%r3,sb_helper3@higher\n" " rldicr %r3,%r3,32,31\n" " oris %r3,%r3,sb_helper3@h\n" " ori %r3,%r3,sb_helper3@l\n" " ld %r3,0(%r3)\n" // load xer word to r3 " mtxer %r3\n" // move r3 to XER // GPR's " ld %r0, 0(%r31)\n" " ld %r1, 8(%r31)\n" // switch stacks (r1 = SP) " ld %r2, 16(%r31)\n" " ld %r3, 24(%r31)\n" " ld %r4, 32(%r31)\n" " ld %r5, 40(%r31)\n" " ld %r6, 48(%r31)\n" " ld %r7, 56(%r31)\n" " ld %r8, 64(%r31)\n" " ld %r9, 72(%r31)\n" " ld %r10, 80(%r31)\n" " ld %r11, 88(%r31)\n" " ld %r12, 96(%r31)\n" " ld %r13, 104(%r31)\n" " ld %r14, 112(%r31)\n" " ld %r15, 120(%r31)\n" " ld %r16, 128(%r31)\n" " ld %r17, 136(%r31)\n" " ld %r18, 144(%r31)\n" " ld %r19, 152(%r31)\n" " ld %r20, 160(%r31)\n" " ld %r21, 168(%r31)\n" " ld %r22, 176(%r31)\n" " ld %r23, 184(%r31)\n" " ld %r24, 192(%r31)\n" " ld %r25, 200(%r31)\n" " ld %r26, 208(%r31)\n" " ld %r27, 216(%r31)\n" " ld %r28, 224(%r31)\n" " ld %r29, 232(%r31)\n" " ld %r30, 240(%r31)\n" " ld %r31, 248(%r31)\n" "nop_start_point:\n" " nop\n" " nop\n" " nop\n" " nop\n" " nop\n" "nop_end_point:\n" ); #endif extern void switchback_asm_undotted; extern void nop_start_point; extern void nop_end_point; void switchback ( void ) { Int i; /* blargh. Copy the entire switchback_asm procedure into new memory on which can can set both write and execute permissions, so we can poke around with it and then run the results. */ #if defined(__powerpc64__) // ppc32 UChar* sa_start = (UChar*)&switchback_asm_undotted; #else UChar* sa_start = (UChar*)&switchback_asm; #endif UChar* sa_nop_start = (UChar*)&nop_start_point; UChar* sa_end = (UChar*)&nop_end_point; #if 0 printf("sa_start %p\n", sa_start ); printf("sa_nop_start %p\n", sa_nop_start); printf("sa_end %p\n", sa_end); #endif Int nbytes = sa_end - sa_start; Int off_nopstart = sa_nop_start - sa_start; if (0) printf("nbytes = %d, nopstart = %d\n", nbytes, off_nopstart); /* copy it into mallocville */ UChar* copy = mymalloc(nbytes); assert(copy); for (i = 0; i < nbytes; i++) copy[i] = sa_start[i]; UInt* p = (UInt*)(©[off_nopstart]); #if !defined(__powerpc64__) // ppc32 Addr32 addr_of_nop = (Addr32)p; Addr32 where_to_go = gst.guest_CIA; Int diff = ((Int)where_to_go) - ((Int)addr_of_nop); #if 0 printf("addr of first nop = 0x%x\n", addr_of_nop); printf("where to go = 0x%x\n", where_to_go); printf("diff = 0x%x\n", diff); #endif #else // ppc64 Addr64 addr_of_nop = (Addr64)p; Addr64 where_to_go = gst.guest_CIA; Long diff = ((Long)where_to_go) - ((Long)addr_of_nop); #if 0 printf("addr of first nop = 0x%llx\n", addr_of_nop); printf("where to go = 0x%llx\n", where_to_go); printf("diff = 0x%llx\n", diff); #endif #endif if (diff < -0x2000000 || diff >= 0x2000000) { // we're hosed. Give up printf("hosed -- offset too large\n"); assert(0); } sb_helper1 = (HWord)&gst; #if !defined(__powerpc64__) // ppc32 sb_helper2 = LibVEX_GuestPPC32_get_CR(&gst); sb_helper3 = LibVEX_GuestPPC32_get_XER(&gst); #else // ppc64 sb_helper2 = LibVEX_GuestPPC64_get_CR(&gst); sb_helper3 = LibVEX_GuestPPC64_get_XER(&gst); #endif /* stay sane ... */ assert(p[0] == 24<<26); /* nop */ /* branch to diff */ p[0] = ((18<<26) | (((diff >> 2) & 0xFFFFFF) << 2) | (0<<1) | (0<<0)); invalidate_icache( copy, nbytes ); #if defined(__powerpc64__) //printf("jumping to %p\n", copy); { ULong faketoc[3]; void* v; faketoc[0] = (ULong)copy; v = &faketoc[0]; ( (void(*)(void)) v )(); } #else ( (void(*)(void))copy )(); #endif } #else # error "Unknown arch (switchback)" #endif /* -------------------- */ static HWord f, gp, res; extern void run_translation_asm(void); #if defined(__i386__) asm( "run_translation_asm:\n" " pushal\n" " movl gp, %ebp\n" " movl f, %eax\n" " call *%eax\n" " movl %eax, res\n" " popal\n" " ret\n" ); #elif defined(__x86_64__) asm( "run_translation_asm:\n" " pushq %rax\n" " pushq %rbx\n" " pushq %rcx\n" " pushq %rdx\n" " pushq %rbp\n" " pushq %rsi\n" " pushq %rdi\n" " pushq %r8\n" " pushq %r9\n" " pushq %r10\n" " pushq %r11\n" " pushq %r12\n" " pushq %r13\n" " pushq %r14\n" " pushq %r15\n" " movq gp, %rbp\n" " movq f, %rax\n" " call *%rax\n" " movq %rax, res\n" " popq %r15\n" " popq %r14\n" " popq %r13\n" " popq %r12\n" " popq %r11\n" " popq %r10\n" " popq %r9\n" " popq %r8\n" " popq %rdi\n" " popq %rsi\n" " popq %rbp\n" " popq %rdx\n" " popq %rcx\n" " popq %rbx\n" " popq %rax\n" " ret\n" ); #elif defined(__powerpc__) #if !defined(__powerpc64__) // ppc32 asm( "run_translation_asm:\n" // create new stack: // save old sp at first word & update sp " stwu 1,-256(1)\n" // save LR " mflr %r0\n" " stw %r0,260(%r1)\n" // leave hole @ 4(%r1) for a callee to save it's LR // no params // no need to save non-volatile CR fields // store registers to stack: just the callee-saved regs " stw %r13, 8(%r1)\n" " stw %r14, 12(%r1)\n" " stw %r15, 16(%r1)\n" " stw %r16, 20(%r1)\n" " stw %r17, 24(%r1)\n" " stw %r18, 28(%r1)\n" " stw %r19, 32(%r1)\n" " stw %r20, 36(%r1)\n" " stw %r21, 40(%r1)\n" " stw %r22, 44(%r1)\n" " stw %r23, 48(%r1)\n" " stw %r24, 52(%r1)\n" " stw %r25, 56(%r1)\n" " stw %r26, 60(%r1)\n" " stw %r27, 64(%r1)\n" " stw %r28, 68(%r1)\n" " stw %r29, 72(%r1)\n" " stw %r30, 76(%r1)\n" " stw %r31, 80(%r1)\n" // r31 (guest state ptr) := global var "gp" " lis %r31,gp@ha\n" " lwz %r31,gp@l(%r31)\n" // call translation address in global var "f" " lis %r4,f@ha\n" " lwz %r4,f@l(%r4)\n" " mtctr %r4\n" " bctrl\n" // save return value (in r3) into global var "res" " lis %r5,res@ha\n" " stw %r3,res@l(%r5)\n" // save possibly modified guest state ptr (r31) in "gp" " lis %r5,gp@ha\n" " stw %r31,gp@l(%r5)\n" // reload registers from stack " lwz %r13, 8(%r1)\n" " lwz %r14, 12(%r1)\n" " lwz %r15, 16(%r1)\n" " lwz %r16, 20(%r1)\n" " lwz %r17, 24(%r1)\n" " lwz %r18, 28(%r1)\n" " lwz %r19, 32(%r1)\n" " lwz %r20, 36(%r1)\n" " lwz %r21, 40(%r1)\n" " lwz %r22, 44(%r1)\n" " lwz %r23, 48(%r1)\n" " lwz %r24, 52(%r1)\n" " lwz %r25, 56(%r1)\n" " lwz %r26, 60(%r1)\n" " lwz %r27, 64(%r1)\n" " lwz %r28, 68(%r1)\n" " lwz %r29, 72(%r1)\n" " lwz %r30, 76(%r1)\n" " lwz %r31, 80(%r1)\n" // restore LR " lwz %r0,260(%r1)\n" " mtlr %r0\n" // restore previous stack pointer " addi %r1,%r1,256\n" // return " blr" ); #else // ppc64 asm( ".text\n" " .global run_translation_asm\n" " .section \".opd\",\"aw\"\n" " .align 3\n" "run_translation_asm:\n" " .quad .run_translation_asm,.TOC.@tocbase,0\n" " .previous\n" " .type .run_translation_asm,@function\n" " .global .run_translation_asm\n" ".run_translation_asm:\n" // save LR,CTR " mflr %r0\n" " std %r0,16(%r1)\n" " mfctr %r0\n" " std %r0,8(%r1)\n" // create new stack: // save old sp at first word & update sp " stdu 1,-256(1)\n" // leave hole @ 4(%r1) for a callee to save it's LR // no params // no need to save non-volatile CR fields // store registers to stack: just the callee-saved regs " std %r13, 48(%r1)\n" " std %r14, 56(%r1)\n" " std %r15, 64(%r1)\n" " std %r16, 72(%r1)\n" " std %r17, 80(%r1)\n" " std %r18, 88(%r1)\n" " std %r19, 96(%r1)\n" " std %r20, 104(%r1)\n" " std %r21, 112(%r1)\n" " std %r22, 120(%r1)\n" " std %r23, 128(%r1)\n" " std %r24, 136(%r1)\n" " std %r25, 144(%r1)\n" " std %r26, 152(%r1)\n" " std %r27, 160(%r1)\n" " std %r28, 168(%r1)\n" " std %r29, 176(%r1)\n" " std %r30, 184(%r1)\n" " std %r31, 192(%r1)\n" // r31 (guest state ptr) := global var "gp" " lis %r31,gp@highest\n" " ori %r31,%r31,gp@higher\n" " rldicr %r31,%r31,32,31\n" " oris %r31,%r31,gp@h\n" " ori %r31,%r31,gp@l\n" " ld %r31,0(%r31)\n" // call translation address in global var "f" " lis %r4,f@highest\n" " ori %r4,%r4,f@higher\n" " rldicr %r4,%r4,32,31\n" " oris %r4,%r4,f@h\n" " ori %r4,%r4,f@l\n" " ld %r4,0(%r4)\n" " mtctr %r4\n" " bctrl\n" // save return value (in r3) into global var "res" " lis %r5,res@highest\n" " ori %r5,%r5,res@higher\n" " rldicr %r5,%r5,32,31\n" " oris %r5,%r5,res@h\n" " ori %r5,%r5,res@l\n" " std %r3,0(%r5)\n" // save possibly modified guest state ptr (r31) in "gp" " lis %r5,gp@highest\n" " ori %r5,%r5,gp@higher\n" " rldicr %r5,%r5,32,31\n" " oris %r5,%r5,gp@h\n" " ori %r5,%r5,gp@l\n" " std %r31,0(%r5)\n" // reload registers from stack " ld %r13, 48(%r1)\n" " ld %r14, 56(%r1)\n" " ld %r15, 64(%r1)\n" " ld %r16, 72(%r1)\n" " ld %r17, 80(%r1)\n" " ld %r18, 88(%r1)\n" " ld %r19, 96(%r1)\n" " ld %r20, 104(%r1)\n" " ld %r21, 112(%r1)\n" " ld %r22, 120(%r1)\n" " ld %r23, 128(%r1)\n" " ld %r24, 136(%r1)\n" " ld %r25, 144(%r1)\n" " ld %r26, 152(%r1)\n" " ld %r27, 160(%r1)\n" " ld %r28, 168(%r1)\n" " ld %r29, 176(%r1)\n" " ld %r30, 184(%r1)\n" " ld %r31, 192(%r1)\n" // restore previous stack pointer " addi %r1,%r1,256\n" // restore LR,CTR " ld %r0,16(%r1)\n" " mtlr %r0\n" " ld %r0,8(%r1)\n" " mtctr %r0\n" // return " blr" ); #endif #else # error "Unknown arch" #endif /* Run a translation at host address 'translation'. Return True if Vex asked for an translation cache flush as a result. */ Bool run_translation ( HWord translation ) { if (0 && DEBUG_TRACE_FLAGS) { printf(" run translation %p\n", (void*)translation ); printf(" simulated bb: %llu\n", n_bbs_done); } f = translation; gp = (HWord)&gst; run_translation_asm(); gst.GuestPC = res; n_bbs_done ++; return gp==VEX_TRC_JMP_TINVAL; } HWord find_translation ( Addr64 guest_addr ) { Int i; HWord __res; if (0) printf("find translation %p ... ", ULong_to_Ptr(guest_addr)); for (i = 0; i < trans_table_used; i++) if (trans_table[i].base[0] == guest_addr) break; if (i == trans_table_used) { if (0) printf("none\n"); return 0; /* not found */ } /* Move this translation one step towards the front, so finding it next time round is just that little bit cheaper. */ if (i > 2) { VexGuestExtents tmpE = trans_table[i-1]; ULong* tmpP = trans_tableP[i-1]; trans_table[i-1] = trans_table[i]; trans_tableP[i-1] = trans_tableP[i]; trans_table[i] = tmpE; trans_tableP[i] = tmpP; i--; } __res = (HWord)trans_tableP[i]; if (0) printf("%p\n", (void*)__res); return __res; } #define N_TRANSBUF 5000 static UChar transbuf[N_TRANSBUF]; void make_translation ( Addr64 guest_addr, Bool verbose ) { VexTranslateArgs vta; VexTranslateResult tres; VexArchInfo vex_archinfo; Int trans_used, i, ws_needed; if (trans_table_used >= N_TRANS_TABLE || trans_cache_used >= N_TRANS_CACHE-1000) { /* If things are looking to full, just dump all the translations. */ trans_cache_used = 0; trans_table_used = 0; } assert(trans_table_used < N_TRANS_TABLE); if (0) printf("make translation %p\n", ULong_to_Ptr(guest_addr)); LibVEX_default_VexArchInfo(&vex_archinfo); vex_archinfo.subarch = VexSubArch; vex_archinfo.ppc_cache_line_szB = CacheLineSize; /* */ vta.arch_guest = VexArch; vta.archinfo_guest = vex_archinfo; vta.arch_host = VexArch; vta.archinfo_host = vex_archinfo; vta.guest_bytes = (UChar*)ULong_to_Ptr(guest_addr); vta.guest_bytes_addr = (Addr64)guest_addr; vta.guest_bytes_addr_noredir = (Addr64)guest_addr; vta.chase_into_ok = chase_into_ok; // vta.guest_extents = &vge; vta.guest_extents = &trans_table[trans_table_used]; vta.host_bytes = transbuf; vta.host_bytes_size = N_TRANSBUF; vta.host_bytes_used = &trans_used; vta.instrument1 = NULL; vta.instrument2 = NULL; vta.do_self_check = False; vta.traceflags = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS; vta.dispatch = NULL; tres = LibVEX_Translate ( &vta ); assert(tres == VexTransOK); ws_needed = (trans_used+7) / 8; assert(ws_needed > 0); assert(trans_cache_used + ws_needed < N_TRANS_CACHE); n_translations_made++; for (i = 0; i < trans_used; i++) { HChar* dst = ((HChar*)(&trans_cache[trans_cache_used])) + i; HChar* src = (HChar*)(&transbuf[i]); *dst = *src; } #if defined(__powerpc__) invalidate_icache( &trans_cache[trans_cache_used], trans_used ); #endif trans_tableP[trans_table_used] = &trans_cache[trans_cache_used]; trans_table_used++; trans_cache_used += ws_needed; } static Bool overlap ( Addr64 start, UInt len, VexGuestExtents* vge ) { Int i; for (i = 0; i < vge->n_used; i++) { if (vge->base[i]+vge->len[i] <= start || vge->base[i] >= start+len) { /* ok */ } else { return True; } } return False; /* no overlap */ } static void dump_translations ( Addr64 start, UInt len ) { Int i, j; j = 0; for (i = 0; i < trans_table_used; i++) { if (overlap(start, len, &trans_table[i])) { /* do nothing */ } else { assert(j <= i); trans_table[j] = trans_table[i]; trans_tableP[j] = trans_tableP[i]; j++; } } assert(j >= 0 && j <= trans_table_used); if (0) printf("dumped %d translations\n", trans_table_used - j); trans_table_used = j; } static ULong stopAfter = 0; static UChar* entryP = NULL; __attribute__ ((noreturn)) static void failure_exit ( void ) { fprintf(stdout, "VEX did failure_exit. Bye.\n"); fprintf(stdout, "bb counter = %llu\n\n", n_bbs_done); exit(1); } static void log_bytes ( HChar* bytes, Int nbytes ) { fwrite ( bytes, 1, nbytes, stdout ); fflush ( stdout ); } /* run simulated code forever (it will exit by calling serviceFn(0)). */ static void run_simulator ( void ) { static Addr64 last_guest = 0; Addr64 next_guest; HWord next_host; Bool need_inval; while (1) { next_guest = gst.GuestPC; if (0) printf("\nnext_guest: 0x%x\n", (UInt)next_guest); #if defined(__powerpc64__) if (next_guest == Ptr_to_ULong( (void*)(*(ULong*)(&serviceFn)) )) { #else if (next_guest == Ptr_to_ULong(&serviceFn)) { #endif /* "do" the function call to serviceFn */ # if defined(__i386__) { HWord esp = gst.guest_ESP; gst.guest_EIP = *(UInt*)(esp+0); gst.guest_EAX = serviceFn( *(UInt*)(esp+4), *(UInt*)(esp+8) ); gst.guest_ESP = esp+4; next_guest = gst.guest_EIP; } # elif defined(__x86_64__) { HWord esp = gst.guest_RSP; gst.guest_RIP = *(UInt*)(esp+0); gst.guest_RAX = serviceFn( gst.guest_RDI, gst.guest_RSI ); gst.guest_RSP = esp+8; next_guest = gst.guest_RIP; } # elif defined(__powerpc__) { gst.guest_GPR3 = serviceFn( gst.guest_GPR3, gst.guest_GPR4 ); gst.guest_CIA = gst.guest_LR; next_guest = gst.guest_CIA; } # else # error "Unknown arch" # endif } next_host = find_translation(next_guest); if (next_host == 0) { make_translation(next_guest,False); next_host = find_translation(next_guest); assert(next_host != 0); } // Switchback if (n_bbs_done == stopAfter) { printf("---begin SWITCHBACK at bb:%llu---\n", n_bbs_done); #if 1 if (last_guest) { printf("\n*** Last run translation (bb:%llu):\n", n_bbs_done-1); make_translation(last_guest,True); } #endif #if 0 if (next_guest) { printf("\n*** Current translation (bb:%llu):\n", n_bbs_done); make_translation(next_guest,True); } #endif printf("--- end SWITCHBACK at bb:%llu ---\n", n_bbs_done); switchback(); assert(0); /*NOTREACHED*/ } last_guest = next_guest; need_inval = run_translation(next_host); if (need_inval) { #if defined(__powerpc__) dump_translations( (Addr64)gst.guest_TISTART, gst.guest_TILEN ); if (0) printf("dump translations done\n"); #endif } } } static void usage ( void ) { printf("usage: switchback #bbs\n"); printf(" - begins switchback for basic block #bbs\n"); printf(" - use -1 for largest possible run without switchback\n\n"); exit(1); } #if defined(__powerpc__) #if !defined(__powerpc64__) // ppc32 UInt saved_R2; asm( "get_R2:\n" " lis %r10,saved_R2@ha\n" " stw %r2,saved_R2@l(%r10)\n" " blr\n" ); #else // ppc64 ULong saved_R2; ULong saved_R13; asm( ".text\n" " .global get_R2\n" " .section \".opd\",\"aw\"\n" " .align 3\n" "get_R2:\n" " .quad .get_R2,.TOC.@tocbase,0\n" " .previous\n" " .type .get_R2,@function\n" " .global .get_R2\n" ".get_R2:\n" " lis %r10,saved_R2@highest\n" " ori %r10,%r10,saved_R2@higher\n" " rldicr %r10,%r10,32,31\n" " oris %r10,%r10,saved_R2@h\n" " ori %r10,%r10,saved_R2@l\n" " std %r2,0(%r10)\n" " blr\n" ); asm( ".text\n" " .global get_R13\n" " .section \".opd\",\"aw\"\n" " .align 3\n" "get_R13:\n" " .quad .get_R13,.TOC.@tocbase,0\n" " .previous\n" " .type .get_R13,@function\n" " .global .get_R13\n" ".get_R13:\n" " lis %r10,saved_R13@highest\n" " ori %r10,%r10,saved_R13@higher\n" " rldicr %r10,%r10,32,31\n" " oris %r10,%r10,saved_R13@h\n" " ori %r10,%r10,saved_R13@l\n" " std %r13,0(%r10)\n" " blr\n" ); #endif extern void get_R2 ( void ); extern void get_R13 ( void ); #endif int main ( Int argc, HChar** argv ) { if (argc != 2) usage(); stopAfter = (ULong)atoll(argv[1]); extern void entry ( void*(*service)(int,int) ); entryP = (UChar*)&entry; if (!entryP) { printf("switchback: can't find entry point\n"); exit(1); } LibVEX_default_VexControl(&vcon); vcon.guest_max_insns=50; vcon.guest_chase_thresh=0; vcon.iropt_level=2; LibVEX_Init( failure_exit, log_bytes, 1, False, &vcon ); LibVEX_Guest_initialise(&gst); /* set up as if a call to the entry point passing serviceFn as the one and only parameter */ # if defined(__i386__) gst.guest_EIP = (UInt)entryP; gst.guest_ESP = (UInt)&gstack[25000]; *(UInt*)(gst.guest_ESP+4) = (UInt)serviceFn; *(UInt*)(gst.guest_ESP+0) = 0x12345678; # elif defined(__x86_64__) gst.guest_RIP = (ULong)entryP; gst.guest_RSP = (ULong)&gstack[25000]; gst.guest_RDI = (ULong)serviceFn; *(ULong*)(gst.guest_RSP+0) = 0x12345678AABBCCDDULL; # elif defined(__powerpc__) get_R2(); #if !defined(__powerpc64__) // ppc32 gst.guest_CIA = (UInt)entryP; gst.guest_GPR1 = (UInt)&gstack[25000]; /* stack pointer */ gst.guest_GPR3 = (UInt)serviceFn; /* param to entry */ gst.guest_GPR2 = saved_R2; gst.guest_LR = 0x12345678; /* bogus return address */ #else // ppc64 get_R13(); gst.guest_CIA = * (ULong*)entryP; gst.guest_GPR1 = (ULong)&gstack[25000]; /* stack pointer */ gst.guest_GPR3 = (ULong)serviceFn; /* param to entry */ gst.guest_GPR2 = saved_R2; gst.guest_GPR13 = saved_R13; gst.guest_LR = 0x1234567812345678ULL; /* bogus return address */ // printf("setting CIA to %p\n", (void*)gst.guest_CIA); #endif # else # error "Unknown arch" # endif printf("\n---START---\n"); #if 1 run_simulator(); #else ( (void(*)(HWord(*)(HWord,HWord))) entryP ) (serviceFn); #endif return 0; }