/* HOW TO USE
13 Dec '05 - Linker no longer used (apart from mymalloc)
Simply compile and link switchback.c with test_xxx.c,
e.g. for ppc64:
$ (cd .. && make EXTRA_CFLAGS="-m64" libvex_ppc64_linux.a) && gcc -m64 -mregnames -Wall -Wshadow -Wno-long-long -Winline -O -g -o switchback switchback.c linker.c ../libvex_ppc64_linux.a test_xxx.c
Test file test_xxx.c must have an entry point called "entry",
which expects to take a single argument which is a function pointer
(to "serviceFn").
Test file may not reference any other symbols.
NOTE: POWERPC: it is critical, when using this on ppc, to set
CacheLineSize to the right value. Values we currently know of:
imac (G3): 32
G5 (ppc970): 128
*/
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include "../pub/libvex_basictypes.h"
#include "../pub/libvex_guest_x86.h"
#include "../pub/libvex_guest_amd64.h"
#include "../pub/libvex_guest_ppc32.h"
#include "../pub/libvex_guest_ppc64.h"
#include "../pub/libvex.h"
#include "../pub/libvex_trc_values.h"
#include "linker.h"
static ULong n_bbs_done = 0;
static Int n_translations_made = 0;
#if defined(__i386__)
# define VexGuestState VexGuestX86State
# define LibVEX_Guest_initialise LibVEX_GuestX86_initialise
# define VexArch VexArchX86
# define VexSubArch VexSubArchX86_sse1
# define GuestPC guest_EIP
# define CacheLineSize 0/*irrelevant*/
#elif defined(__x86_64__)
# define VexGuestState VexGuestAMD64State
# define LibVEX_Guest_initialise LibVEX_GuestAMD64_initialise
# define VexArch VexArchAMD64
# define VexSubArch VexSubArch_NONE
# define GuestPC guest_RIP
# define CacheLineSize 0/*irrelevant*/
#elif defined(__powerpc__)
#if !defined(__powerpc64__) // ppc32
# define VexGuestState VexGuestPPC32State
# define LibVEX_Guest_initialise LibVEX_GuestPPC32_initialise
# define VexArch VexArchPPC32
# define VexSubArch VexSubArchPPC32_FI
# define GuestPC guest_CIA
# define CacheLineSize 128
#else
# define VexGuestState VexGuestPPC64State
# define LibVEX_Guest_initialise LibVEX_GuestPPC64_initialise
# define VexArch VexArchPPC64
# define VexSubArch VexSubArchPPC64_FI
# define GuestPC guest_CIA
# define CacheLineSize 128
#endif
#else
# error "Unknown arch"
#endif
/* 7: show conversion into IR */
/* 6: show after initial opt */
/* 5: show after instrumentation */
/* 4: show after second opt */
/* 3: show after tree building */
/* 2: show selected insns */
/* 1: show after reg-alloc */
/* 0: show final assembly */
#define TEST_FLAGS (1<<7)|(1<<3)|(1<<2)|(1<<1)|(1<<0)
#define DEBUG_TRACE_FLAGS 0//(1<<7)|(0<<6)|(0<<5)|(0<<4)|(1<<3)|(1<<2)|(1<<1)|(1<<0)
/* guest state */
UInt gstack[50000];
VexGuestState gst;
VexControl vcon;
/* only used for the switchback transition */
/* i386: helper1 = &gst, helper2 = %EFLAGS */
/* amd64: helper1 = &gst, helper2 = %EFLAGS */
/* ppc32: helper1 = &gst, helper2 = %CR, helper3 = %XER */
HWord sb_helper1 = 0;
HWord sb_helper2 = 0;
HWord sb_helper3 = 0;
/* translation cache */
#define N_TRANS_CACHE 1000000
#define N_TRANS_TABLE 10000
ULong trans_cache[N_TRANS_CACHE];
VexGuestExtents trans_table [N_TRANS_TABLE];
ULong* trans_tableP[N_TRANS_TABLE];
Int trans_cache_used = 0;
Int trans_table_used = 0;
static Bool chase_into_ok ( Addr64 dst ) { return False; }
#if 0
// local_sys_write_stderr(&c,1);
static void local_sys_write_stderr ( HChar* buf, Int n )
{
UInt __res;
__asm__ volatile (
"li %%r0,4\n\t" /* set %r0 = __NR_write */
"li %%r3,1\n\t" /* set %r3 = stdout */
"mr %%r4,%1\n\t" /* set %r4 = buf */
"mr %%r5,%2\n\t" /* set %r5 = n */
"sc\n\t" /* write(stderr, buf, n) */
"mr %0,%%r3\n" /* set __res = r3 */
: "=mr" (__res)
: "g" (buf), "g" (n)
: "r0", "r3", "r4", "r5" );
}
#endif
/* For providing services. */
static HWord serviceFn ( HWord arg1, HWord arg2 )
{
switch (arg1) {
case 0: /* EXIT */
printf("---STOP---\n");
printf("serviceFn:EXIT\n");
printf("%llu bbs simulated\n", n_bbs_done);
printf("%d translations made, %d tt bytes\n",
n_translations_made, 8*trans_cache_used);
exit(0);
case 1: /* PUTC */
putchar(arg2);
return 0;
case 2: /* MALLOC */
return (HWord)malloc(arg2);
case 3: /* FREE */
free((void*)arg2);
return 0;
default:
assert(0);
}
}
/* -------------------- */
/* continue execution on the real CPU (never returns) */
extern void switchback_asm(void);
#if defined(__i386__)
asm(
"switchback_asm:\n"
" movl sb_helper1, %eax\n" // eax = guest state ptr
" movl 16(%eax), %esp\n" // switch stacks
" pushl 56(%eax)\n" // push continuation addr
" movl sb_helper2, %ebx\n" // get eflags
" pushl %ebx\n" // eflags:CA
" pushl 0(%eax)\n" // EAX:eflags:CA
" movl 4(%eax), %ecx\n"
" movl 8(%eax), %edx\n"
" movl 12(%eax), %ebx\n"
" movl 20(%eax), %ebp\n"
" movl 24(%eax), %esi\n"
" movl 28(%eax), %edi\n"
" popl %eax\n"
" popfl\n"
" ret\n"
);
void switchback ( void )
{
sb_helper1 = (HWord)&gst;
sb_helper2 = LibVEX_GuestX86_get_eflags(&gst);
switchback_asm(); // never returns
}
#elif defined(__x86_64__)
asm(
"switchback_asm:\n"
" movq sb_helper1, %rax\n" // rax = guest state ptr
" movq 32(%rax), %rsp\n" // switch stacks
" pushq 168(%rax)\n" // push continuation addr
" movq sb_helper2, %rbx\n" // get eflags
" pushq %rbx\n" // eflags:CA
" pushq 0(%rax)\n" // RAX:eflags:CA
" movq 8(%rax), %rcx\n"
" movq 16(%rax), %rdx\n"
" movq 24(%rax), %rbx\n"
" movq 40(%rax), %rbp\n"
" movq 48(%rax), %rsi\n"
" movq 56(%rax), %rdi\n"
" movq 64(%rax), %r8\n"
" movq 72(%rax), %r9\n"
" movq 80(%rax), %r10\n"
" movq 88(%rax), %r11\n"
" movq 96(%rax), %r12\n"
" movq 104(%rax), %r13\n"
" movq 112(%rax), %r14\n"
" movq 120(%rax), %r15\n"
" popq %rax\n"
" popfq\n"
" ret\n"
);
void switchback ( void )
{
sb_helper1 = (HWord)&gst;
sb_helper2 = LibVEX_GuestAMD64_get_rflags(&gst);
switchback_asm(); // never returns
}
#elif defined(__powerpc__)
static void invalidate_icache(void *ptr, int nbytes)
{
unsigned long startaddr = (unsigned long) ptr;
unsigned long endaddr = startaddr + nbytes;
unsigned long addr;
unsigned long cls = CacheLineSize;
startaddr &= ~(cls - 1);
for (addr = startaddr; addr < endaddr; addr += cls)
asm volatile("dcbst 0,%0" : : "r" (addr));
asm volatile("sync");
for (addr = startaddr; addr < endaddr; addr += cls)
asm volatile("icbi 0,%0" : : "r" (addr));
asm volatile("sync; isync");
}
#if !defined(__powerpc64__) // ppc32
asm(
"switchback_asm:\n"
// gst
" lis %r31,sb_helper1@ha\n" // get hi-wd of guest_state_ptr addr
" lwz %r31,sb_helper1@l(%r31)\n" // load word of guest_state_ptr to r31
// LR
" lwz %r3,900(%r31)\n" // guest_LR
" mtlr %r3\n" // move to LR
// CR
" lis %r3,sb_helper2@ha\n" // get hi-wd of flags addr
" lwz %r3,sb_helper2@l(%r3)\n" // load flags word to r3
" mtcr %r3\n" // move r3 to CR
// CTR
" lwz %r3,904(%r31)\n" // guest_CTR
" mtctr %r3\n" // move r3 to CTR
// XER
" lis %r3,sb_helper3@ha\n" // get hi-wd of xer addr
" lwz %r3,sb_helper3@l(%r3)\n" // load xer word to r3
" mtxer %r3\n" // move r3 to XER
// GPR's
" lwz %r0, 0(%r31)\n"
" lwz %r1, 4(%r31)\n" // switch stacks (r1 = SP)
" lwz %r2, 8(%r31)\n"
" lwz %r3, 12(%r31)\n"
" lwz %r4, 16(%r31)\n"
" lwz %r5, 20(%r31)\n"
" lwz %r6, 24(%r31)\n"
" lwz %r7, 28(%r31)\n"
" lwz %r8, 32(%r31)\n"
" lwz %r9, 36(%r31)\n"
" lwz %r10, 40(%r31)\n"
" lwz %r11, 44(%r31)\n"
" lwz %r12, 48(%r31)\n"
" lwz %r13, 52(%r31)\n"
" lwz %r14, 56(%r31)\n"
" lwz %r15, 60(%r31)\n"
" lwz %r16, 64(%r31)\n"
" lwz %r17, 68(%r31)\n"
" lwz %r18, 72(%r31)\n"
" lwz %r19, 76(%r31)\n"
" lwz %r20, 80(%r31)\n"
" lwz %r21, 84(%r31)\n"
" lwz %r22, 88(%r31)\n"
" lwz %r23, 92(%r31)\n"
" lwz %r24, 96(%r31)\n"
" lwz %r25, 100(%r31)\n"
" lwz %r26, 104(%r31)\n"
" lwz %r27, 108(%r31)\n"
" lwz %r28, 112(%r31)\n"
" lwz %r29, 116(%r31)\n"
" lwz %r30, 120(%r31)\n"
" lwz %r31, 124(%r31)\n"
"nop_start_point:\n"
" nop\n"
" nop\n"
" nop\n"
" nop\n"
" nop\n"
"nop_end_point:\n"
);
#else // ppc64
asm(
".text\n"
" .global switchback_asm\n"
" .section \".opd\",\"aw\"\n"
" .align 3\n"
"switchback_asm:\n"
" .quad .switchback_asm,.TOC.@tocbase,0\n"
" .previous\n"
" .type .switchback_asm,@function\n"
" .global .switchback_asm\n"
".switchback_asm:\n"
"switchback_asm_undotted:\n"
// gst: load word of guest_state_ptr to r31
" lis %r31,sb_helper1@highest\n"
" ori %r31,%r31,sb_helper1@higher\n"
" rldicr %r31,%r31,32,31\n"
" oris %r31,%r31,sb_helper1@h\n"
" ori %r31,%r31,sb_helper1@l\n"
" ld %r31,0(%r31)\n"
// LR
" ld %r3,1032(%r31)\n" // guest_LR
" mtlr %r3\n" // move to LR
// CR
" lis %r3,sb_helper2@highest\n"
" ori %r3,%r3,sb_helper2@higher\n"
" rldicr %r3,%r3,32,31\n"
" oris %r3,%r3,sb_helper2@h\n"
" ori %r3,%r3,sb_helper2@l\n"
" ld %r3,0(%r3)\n" // load flags word to r3
" mtcr %r3\n" // move r3 to CR
// CTR
" ld %r3,1040(%r31)\n" // guest_CTR
" mtctr %r3\n" // move r3 to CTR
// XER
" lis %r3,sb_helper3@highest\n"
" ori %r3,%r3,sb_helper3@higher\n"
" rldicr %r3,%r3,32,31\n"
" oris %r3,%r3,sb_helper3@h\n"
" ori %r3,%r3,sb_helper3@l\n"
" ld %r3,0(%r3)\n" // load xer word to r3
" mtxer %r3\n" // move r3 to XER
// GPR's
" ld %r0, 0(%r31)\n"
" ld %r1, 8(%r31)\n" // switch stacks (r1 = SP)
" ld %r2, 16(%r31)\n"
" ld %r3, 24(%r31)\n"
" ld %r4, 32(%r31)\n"
" ld %r5, 40(%r31)\n"
" ld %r6, 48(%r31)\n"
" ld %r7, 56(%r31)\n"
" ld %r8, 64(%r31)\n"
" ld %r9, 72(%r31)\n"
" ld %r10, 80(%r31)\n"
" ld %r11, 88(%r31)\n"
" ld %r12, 96(%r31)\n"
" ld %r13, 104(%r31)\n"
" ld %r14, 112(%r31)\n"
" ld %r15, 120(%r31)\n"
" ld %r16, 128(%r31)\n"
" ld %r17, 136(%r31)\n"
" ld %r18, 144(%r31)\n"
" ld %r19, 152(%r31)\n"
" ld %r20, 160(%r31)\n"
" ld %r21, 168(%r31)\n"
" ld %r22, 176(%r31)\n"
" ld %r23, 184(%r31)\n"
" ld %r24, 192(%r31)\n"
" ld %r25, 200(%r31)\n"
" ld %r26, 208(%r31)\n"
" ld %r27, 216(%r31)\n"
" ld %r28, 224(%r31)\n"
" ld %r29, 232(%r31)\n"
" ld %r30, 240(%r31)\n"
" ld %r31, 248(%r31)\n"
"nop_start_point:\n"
" nop\n"
" nop\n"
" nop\n"
" nop\n"
" nop\n"
"nop_end_point:\n"
);
#endif
extern void switchback_asm_undotted;
extern void nop_start_point;
extern void nop_end_point;
void switchback ( void )
{
Int i;
/* blargh. Copy the entire switchback_asm procedure into new
memory on which can can set both write and execute permissions,
so we can poke around with it and then run the results. */
#if defined(__powerpc64__) // ppc32
UChar* sa_start = (UChar*)&switchback_asm_undotted;
#else
UChar* sa_start = (UChar*)&switchback_asm;
#endif
UChar* sa_nop_start = (UChar*)&nop_start_point;
UChar* sa_end = (UChar*)&nop_end_point;
#if 0
printf("sa_start %p\n", sa_start );
printf("sa_nop_start %p\n", sa_nop_start);
printf("sa_end %p\n", sa_end);
#endif
Int nbytes = sa_end - sa_start;
Int off_nopstart = sa_nop_start - sa_start;
if (0)
printf("nbytes = %d, nopstart = %d\n", nbytes, off_nopstart);
/* copy it into mallocville */
UChar* copy = mymalloc(nbytes);
assert(copy);
for (i = 0; i < nbytes; i++)
copy[i] = sa_start[i];
UInt* p = (UInt*)(©[off_nopstart]);
#if !defined(__powerpc64__) // ppc32
Addr32 addr_of_nop = (Addr32)p;
Addr32 where_to_go = gst.guest_CIA;
Int diff = ((Int)where_to_go) - ((Int)addr_of_nop);
#if 0
printf("addr of first nop = 0x%x\n", addr_of_nop);
printf("where to go = 0x%x\n", where_to_go);
printf("diff = 0x%x\n", diff);
#endif
#else // ppc64
Addr64 addr_of_nop = (Addr64)p;
Addr64 where_to_go = gst.guest_CIA;
Long diff = ((Long)where_to_go) - ((Long)addr_of_nop);
#if 0
printf("addr of first nop = 0x%llx\n", addr_of_nop);
printf("where to go = 0x%llx\n", where_to_go);
printf("diff = 0x%llx\n", diff);
#endif
#endif
if (diff < -0x2000000 || diff >= 0x2000000) {
// we're hosed. Give up
printf("hosed -- offset too large\n");
assert(0);
}
sb_helper1 = (HWord)&gst;
#if !defined(__powerpc64__) // ppc32
sb_helper2 = LibVEX_GuestPPC32_get_CR(&gst);
sb_helper3 = LibVEX_GuestPPC32_get_XER(&gst);
#else // ppc64
sb_helper2 = LibVEX_GuestPPC64_get_CR(&gst);
sb_helper3 = LibVEX_GuestPPC64_get_XER(&gst);
#endif
/* stay sane ... */
assert(p[0] == 24<<26); /* nop */
/* branch to diff */
p[0] = ((18<<26) | (((diff >> 2) & 0xFFFFFF) << 2) | (0<<1) | (0<<0));
invalidate_icache( copy, nbytes );
#if defined(__powerpc64__)
//printf("jumping to %p\n", copy);
{ ULong faketoc[3];
void* v;
faketoc[0] = (ULong)copy;
v = &faketoc[0];
( (void(*)(void)) v )();
}
#else
( (void(*)(void))copy )();
#endif
}
#else
# error "Unknown arch (switchback)"
#endif
/* -------------------- */
static HWord f, gp, res;
extern void run_translation_asm(void);
#if defined(__i386__)
asm(
"run_translation_asm:\n"
" pushal\n"
" movl gp, %ebp\n"
" movl f, %eax\n"
" call *%eax\n"
" movl %eax, res\n"
" popal\n"
" ret\n"
);
#elif defined(__x86_64__)
asm(
"run_translation_asm:\n"
" pushq %rax\n"
" pushq %rbx\n"
" pushq %rcx\n"
" pushq %rdx\n"
" pushq %rbp\n"
" pushq %rsi\n"
" pushq %rdi\n"
" pushq %r8\n"
" pushq %r9\n"
" pushq %r10\n"
" pushq %r11\n"
" pushq %r12\n"
" pushq %r13\n"
" pushq %r14\n"
" pushq %r15\n"
" movq gp, %rbp\n"
" movq f, %rax\n"
" call *%rax\n"
" movq %rax, res\n"
" popq %r15\n"
" popq %r14\n"
" popq %r13\n"
" popq %r12\n"
" popq %r11\n"
" popq %r10\n"
" popq %r9\n"
" popq %r8\n"
" popq %rdi\n"
" popq %rsi\n"
" popq %rbp\n"
" popq %rdx\n"
" popq %rcx\n"
" popq %rbx\n"
" popq %rax\n"
" ret\n"
);
#elif defined(__powerpc__)
#if !defined(__powerpc64__) // ppc32
asm(
"run_translation_asm:\n"
// create new stack:
// save old sp at first word & update sp
" stwu 1,-256(1)\n"
// save LR
" mflr %r0\n"
" stw %r0,260(%r1)\n"
// leave hole @ 4(%r1) for a callee to save it's LR
// no params
// no need to save non-volatile CR fields
// store registers to stack: just the callee-saved regs
" stw %r13, 8(%r1)\n"
" stw %r14, 12(%r1)\n"
" stw %r15, 16(%r1)\n"
" stw %r16, 20(%r1)\n"
" stw %r17, 24(%r1)\n"
" stw %r18, 28(%r1)\n"
" stw %r19, 32(%r1)\n"
" stw %r20, 36(%r1)\n"
" stw %r21, 40(%r1)\n"
" stw %r22, 44(%r1)\n"
" stw %r23, 48(%r1)\n"
" stw %r24, 52(%r1)\n"
" stw %r25, 56(%r1)\n"
" stw %r26, 60(%r1)\n"
" stw %r27, 64(%r1)\n"
" stw %r28, 68(%r1)\n"
" stw %r29, 72(%r1)\n"
" stw %r30, 76(%r1)\n"
" stw %r31, 80(%r1)\n"
// r31 (guest state ptr) := global var "gp"
" lis %r31,gp@ha\n"
" lwz %r31,gp@l(%r31)\n"
// call translation address in global var "f"
" lis %r4,f@ha\n"
" lwz %r4,f@l(%r4)\n"
" mtctr %r4\n"
" bctrl\n"
// save return value (in r3) into global var "res"
" lis %r5,res@ha\n"
" stw %r3,res@l(%r5)\n"
// save possibly modified guest state ptr (r31) in "gp"
" lis %r5,gp@ha\n"
" stw %r31,gp@l(%r5)\n"
// reload registers from stack
" lwz %r13, 8(%r1)\n"
" lwz %r14, 12(%r1)\n"
" lwz %r15, 16(%r1)\n"
" lwz %r16, 20(%r1)\n"
" lwz %r17, 24(%r1)\n"
" lwz %r18, 28(%r1)\n"
" lwz %r19, 32(%r1)\n"
" lwz %r20, 36(%r1)\n"
" lwz %r21, 40(%r1)\n"
" lwz %r22, 44(%r1)\n"
" lwz %r23, 48(%r1)\n"
" lwz %r24, 52(%r1)\n"
" lwz %r25, 56(%r1)\n"
" lwz %r26, 60(%r1)\n"
" lwz %r27, 64(%r1)\n"
" lwz %r28, 68(%r1)\n"
" lwz %r29, 72(%r1)\n"
" lwz %r30, 76(%r1)\n"
" lwz %r31, 80(%r1)\n"
// restore LR
" lwz %r0,260(%r1)\n"
" mtlr %r0\n"
// restore previous stack pointer
" addi %r1,%r1,256\n"
// return
" blr"
);
#else // ppc64
asm(
".text\n"
" .global run_translation_asm\n"
" .section \".opd\",\"aw\"\n"
" .align 3\n"
"run_translation_asm:\n"
" .quad .run_translation_asm,.TOC.@tocbase,0\n"
" .previous\n"
" .type .run_translation_asm,@function\n"
" .global .run_translation_asm\n"
".run_translation_asm:\n"
// save LR,CTR
" mflr %r0\n"
" std %r0,16(%r1)\n"
" mfctr %r0\n"
" std %r0,8(%r1)\n"
// create new stack:
// save old sp at first word & update sp
" stdu 1,-256(1)\n"
// leave hole @ 4(%r1) for a callee to save it's LR
// no params
// no need to save non-volatile CR fields
// store registers to stack: just the callee-saved regs
" std %r13, 48(%r1)\n"
" std %r14, 56(%r1)\n"
" std %r15, 64(%r1)\n"
" std %r16, 72(%r1)\n"
" std %r17, 80(%r1)\n"
" std %r18, 88(%r1)\n"
" std %r19, 96(%r1)\n"
" std %r20, 104(%r1)\n"
" std %r21, 112(%r1)\n"
" std %r22, 120(%r1)\n"
" std %r23, 128(%r1)\n"
" std %r24, 136(%r1)\n"
" std %r25, 144(%r1)\n"
" std %r26, 152(%r1)\n"
" std %r27, 160(%r1)\n"
" std %r28, 168(%r1)\n"
" std %r29, 176(%r1)\n"
" std %r30, 184(%r1)\n"
" std %r31, 192(%r1)\n"
// r31 (guest state ptr) := global var "gp"
" lis %r31,gp@highest\n"
" ori %r31,%r31,gp@higher\n"
" rldicr %r31,%r31,32,31\n"
" oris %r31,%r31,gp@h\n"
" ori %r31,%r31,gp@l\n"
" ld %r31,0(%r31)\n"
// call translation address in global var "f"
" lis %r4,f@highest\n"
" ori %r4,%r4,f@higher\n"
" rldicr %r4,%r4,32,31\n"
" oris %r4,%r4,f@h\n"
" ori %r4,%r4,f@l\n"
" ld %r4,0(%r4)\n"
" mtctr %r4\n"
" bctrl\n"
// save return value (in r3) into global var "res"
" lis %r5,res@highest\n"
" ori %r5,%r5,res@higher\n"
" rldicr %r5,%r5,32,31\n"
" oris %r5,%r5,res@h\n"
" ori %r5,%r5,res@l\n"
" std %r3,0(%r5)\n"
// save possibly modified guest state ptr (r31) in "gp"
" lis %r5,gp@highest\n"
" ori %r5,%r5,gp@higher\n"
" rldicr %r5,%r5,32,31\n"
" oris %r5,%r5,gp@h\n"
" ori %r5,%r5,gp@l\n"
" std %r31,0(%r5)\n"
// reload registers from stack
" ld %r13, 48(%r1)\n"
" ld %r14, 56(%r1)\n"
" ld %r15, 64(%r1)\n"
" ld %r16, 72(%r1)\n"
" ld %r17, 80(%r1)\n"
" ld %r18, 88(%r1)\n"
" ld %r19, 96(%r1)\n"
" ld %r20, 104(%r1)\n"
" ld %r21, 112(%r1)\n"
" ld %r22, 120(%r1)\n"
" ld %r23, 128(%r1)\n"
" ld %r24, 136(%r1)\n"
" ld %r25, 144(%r1)\n"
" ld %r26, 152(%r1)\n"
" ld %r27, 160(%r1)\n"
" ld %r28, 168(%r1)\n"
" ld %r29, 176(%r1)\n"
" ld %r30, 184(%r1)\n"
" ld %r31, 192(%r1)\n"
// restore previous stack pointer
" addi %r1,%r1,256\n"
// restore LR,CTR
" ld %r0,16(%r1)\n"
" mtlr %r0\n"
" ld %r0,8(%r1)\n"
" mtctr %r0\n"
// return
" blr"
);
#endif
#else
# error "Unknown arch"
#endif
/* Run a translation at host address 'translation'. Return
True if Vex asked for an translation cache flush as a result.
*/
Bool run_translation ( HWord translation )
{
if (0 && DEBUG_TRACE_FLAGS) {
printf(" run translation %p\n", (void*)translation );
printf(" simulated bb: %llu\n", n_bbs_done);
}
f = translation;
gp = (HWord)&gst;
run_translation_asm();
gst.GuestPC = res;
n_bbs_done ++;
return gp==VEX_TRC_JMP_TINVAL;
}
HWord find_translation ( Addr64 guest_addr )
{
Int i;
HWord __res;
if (0)
printf("find translation %p ... ", ULong_to_Ptr(guest_addr));
for (i = 0; i < trans_table_used; i++)
if (trans_table[i].base[0] == guest_addr)
break;
if (i == trans_table_used) {
if (0) printf("none\n");
return 0; /* not found */
}
/* Move this translation one step towards the front, so finding it
next time round is just that little bit cheaper. */
if (i > 2) {
VexGuestExtents tmpE = trans_table[i-1];
ULong* tmpP = trans_tableP[i-1];
trans_table[i-1] = trans_table[i];
trans_tableP[i-1] = trans_tableP[i];
trans_table[i] = tmpE;
trans_tableP[i] = tmpP;
i--;
}
__res = (HWord)trans_tableP[i];
if (0) printf("%p\n", (void*)__res);
return __res;
}
#define N_TRANSBUF 5000
static UChar transbuf[N_TRANSBUF];
void make_translation ( Addr64 guest_addr, Bool verbose )
{
VexTranslateArgs vta;
VexTranslateResult tres;
VexArchInfo vex_archinfo;
Int trans_used, i, ws_needed;
if (trans_table_used >= N_TRANS_TABLE
|| trans_cache_used >= N_TRANS_CACHE-1000) {
/* If things are looking to full, just dump
all the translations. */
trans_cache_used = 0;
trans_table_used = 0;
}
assert(trans_table_used < N_TRANS_TABLE);
if (0)
printf("make translation %p\n", ULong_to_Ptr(guest_addr));
LibVEX_default_VexArchInfo(&vex_archinfo);
vex_archinfo.subarch = VexSubArch;
vex_archinfo.ppc_cache_line_szB = CacheLineSize;
/* */
vta.arch_guest = VexArch;
vta.archinfo_guest = vex_archinfo;
vta.arch_host = VexArch;
vta.archinfo_host = vex_archinfo;
vta.guest_bytes = (UChar*)ULong_to_Ptr(guest_addr);
vta.guest_bytes_addr = (Addr64)guest_addr;
vta.guest_bytes_addr_noredir = (Addr64)guest_addr;
vta.chase_into_ok = chase_into_ok;
// vta.guest_extents = &vge;
vta.guest_extents = &trans_table[trans_table_used];
vta.host_bytes = transbuf;
vta.host_bytes_size = N_TRANSBUF;
vta.host_bytes_used = &trans_used;
vta.instrument1 = NULL;
vta.instrument2 = NULL;
vta.do_self_check = False;
vta.traceflags = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS;
vta.dispatch = NULL;
tres = LibVEX_Translate ( &vta );
assert(tres == VexTransOK);
ws_needed = (trans_used+7) / 8;
assert(ws_needed > 0);
assert(trans_cache_used + ws_needed < N_TRANS_CACHE);
n_translations_made++;
for (i = 0; i < trans_used; i++) {
HChar* dst = ((HChar*)(&trans_cache[trans_cache_used])) + i;
HChar* src = (HChar*)(&transbuf[i]);
*dst = *src;
}
#if defined(__powerpc__)
invalidate_icache( &trans_cache[trans_cache_used], trans_used );
#endif
trans_tableP[trans_table_used] = &trans_cache[trans_cache_used];
trans_table_used++;
trans_cache_used += ws_needed;
}
static Bool overlap ( Addr64 start, UInt len, VexGuestExtents* vge )
{
Int i;
for (i = 0; i < vge->n_used; i++) {
if (vge->base[i]+vge->len[i] <= start
|| vge->base[i] >= start+len) {
/* ok */
} else {
return True;
}
}
return False; /* no overlap */
}
static void dump_translations ( Addr64 start, UInt len )
{
Int i, j;
j = 0;
for (i = 0; i < trans_table_used; i++) {
if (overlap(start, len, &trans_table[i])) {
/* do nothing */
} else {
assert(j <= i);
trans_table[j] = trans_table[i];
trans_tableP[j] = trans_tableP[i];
j++;
}
}
assert(j >= 0 && j <= trans_table_used);
if (0) printf("dumped %d translations\n", trans_table_used - j);
trans_table_used = j;
}
static ULong stopAfter = 0;
static UChar* entryP = NULL;
__attribute__ ((noreturn))
static
void failure_exit ( void )
{
fprintf(stdout, "VEX did failure_exit. Bye.\n");
fprintf(stdout, "bb counter = %llu\n\n", n_bbs_done);
exit(1);
}
static
void log_bytes ( HChar* bytes, Int nbytes )
{
fwrite ( bytes, 1, nbytes, stdout );
fflush ( stdout );
}
/* run simulated code forever (it will exit by calling
serviceFn(0)). */
static void run_simulator ( void )
{
static Addr64 last_guest = 0;
Addr64 next_guest;
HWord next_host;
Bool need_inval;
while (1) {
next_guest = gst.GuestPC;
if (0)
printf("\nnext_guest: 0x%x\n", (UInt)next_guest);
#if defined(__powerpc64__)
if (next_guest == Ptr_to_ULong( (void*)(*(ULong*)(&serviceFn)) )) {
#else
if (next_guest == Ptr_to_ULong(&serviceFn)) {
#endif
/* "do" the function call to serviceFn */
# if defined(__i386__)
{
HWord esp = gst.guest_ESP;
gst.guest_EIP = *(UInt*)(esp+0);
gst.guest_EAX = serviceFn( *(UInt*)(esp+4), *(UInt*)(esp+8) );
gst.guest_ESP = esp+4;
next_guest = gst.guest_EIP;
}
# elif defined(__x86_64__)
{
HWord esp = gst.guest_RSP;
gst.guest_RIP = *(UInt*)(esp+0);
gst.guest_RAX = serviceFn( gst.guest_RDI, gst.guest_RSI );
gst.guest_RSP = esp+8;
next_guest = gst.guest_RIP;
}
# elif defined(__powerpc__)
{
gst.guest_GPR3 = serviceFn( gst.guest_GPR3, gst.guest_GPR4 );
gst.guest_CIA = gst.guest_LR;
next_guest = gst.guest_CIA;
}
# else
# error "Unknown arch"
# endif
}
next_host = find_translation(next_guest);
if (next_host == 0) {
make_translation(next_guest,False);
next_host = find_translation(next_guest);
assert(next_host != 0);
}
// Switchback
if (n_bbs_done == stopAfter) {
printf("---begin SWITCHBACK at bb:%llu---\n", n_bbs_done);
#if 1
if (last_guest) {
printf("\n*** Last run translation (bb:%llu):\n", n_bbs_done-1);
make_translation(last_guest,True);
}
#endif
#if 0
if (next_guest) {
printf("\n*** Current translation (bb:%llu):\n", n_bbs_done);
make_translation(next_guest,True);
}
#endif
printf("--- end SWITCHBACK at bb:%llu ---\n", n_bbs_done);
switchback();
assert(0); /*NOTREACHED*/
}
last_guest = next_guest;
need_inval = run_translation(next_host);
if (need_inval) {
#if defined(__powerpc__)
dump_translations( (Addr64)gst.guest_TISTART, gst.guest_TILEN );
if (0) printf("dump translations done\n");
#endif
}
}
}
static void usage ( void )
{
printf("usage: switchback #bbs\n");
printf(" - begins switchback for basic block #bbs\n");
printf(" - use -1 for largest possible run without switchback\n\n");
exit(1);
}
#if defined(__powerpc__)
#if !defined(__powerpc64__) // ppc32
UInt saved_R2;
asm(
"get_R2:\n"
" lis %r10,saved_R2@ha\n"
" stw %r2,saved_R2@l(%r10)\n"
" blr\n"
);
#else // ppc64
ULong saved_R2;
ULong saved_R13;
asm(
".text\n"
" .global get_R2\n"
" .section \".opd\",\"aw\"\n"
" .align 3\n"
"get_R2:\n"
" .quad .get_R2,.TOC.@tocbase,0\n"
" .previous\n"
" .type .get_R2,@function\n"
" .global .get_R2\n"
".get_R2:\n"
" lis %r10,saved_R2@highest\n"
" ori %r10,%r10,saved_R2@higher\n"
" rldicr %r10,%r10,32,31\n"
" oris %r10,%r10,saved_R2@h\n"
" ori %r10,%r10,saved_R2@l\n"
" std %r2,0(%r10)\n"
" blr\n"
);
asm(
".text\n"
" .global get_R13\n"
" .section \".opd\",\"aw\"\n"
" .align 3\n"
"get_R13:\n"
" .quad .get_R13,.TOC.@tocbase,0\n"
" .previous\n"
" .type .get_R13,@function\n"
" .global .get_R13\n"
".get_R13:\n"
" lis %r10,saved_R13@highest\n"
" ori %r10,%r10,saved_R13@higher\n"
" rldicr %r10,%r10,32,31\n"
" oris %r10,%r10,saved_R13@h\n"
" ori %r10,%r10,saved_R13@l\n"
" std %r13,0(%r10)\n"
" blr\n"
);
#endif
extern void get_R2 ( void );
extern void get_R13 ( void );
#endif
int main ( Int argc, HChar** argv )
{
if (argc != 2)
usage();
stopAfter = (ULong)atoll(argv[1]);
extern void entry ( void*(*service)(int,int) );
entryP = (UChar*)&entry;
if (!entryP) {
printf("switchback: can't find entry point\n");
exit(1);
}
LibVEX_default_VexControl(&vcon);
vcon.guest_max_insns=50;
vcon.guest_chase_thresh=0;
vcon.iropt_level=2;
LibVEX_Init( failure_exit, log_bytes, 1, False, &vcon );
LibVEX_Guest_initialise(&gst);
/* set up as if a call to the entry point passing serviceFn as
the one and only parameter */
# if defined(__i386__)
gst.guest_EIP = (UInt)entryP;
gst.guest_ESP = (UInt)&gstack[25000];
*(UInt*)(gst.guest_ESP+4) = (UInt)serviceFn;
*(UInt*)(gst.guest_ESP+0) = 0x12345678;
# elif defined(__x86_64__)
gst.guest_RIP = (ULong)entryP;
gst.guest_RSP = (ULong)&gstack[25000];
gst.guest_RDI = (ULong)serviceFn;
*(ULong*)(gst.guest_RSP+0) = 0x12345678AABBCCDDULL;
# elif defined(__powerpc__)
get_R2();
#if !defined(__powerpc64__) // ppc32
gst.guest_CIA = (UInt)entryP;
gst.guest_GPR1 = (UInt)&gstack[25000]; /* stack pointer */
gst.guest_GPR3 = (UInt)serviceFn; /* param to entry */
gst.guest_GPR2 = saved_R2;
gst.guest_LR = 0x12345678; /* bogus return address */
#else // ppc64
get_R13();
gst.guest_CIA = * (ULong*)entryP;
gst.guest_GPR1 = (ULong)&gstack[25000]; /* stack pointer */
gst.guest_GPR3 = (ULong)serviceFn; /* param to entry */
gst.guest_GPR2 = saved_R2;
gst.guest_GPR13 = saved_R13;
gst.guest_LR = 0x1234567812345678ULL; /* bogus return address */
// printf("setting CIA to %p\n", (void*)gst.guest_CIA);
#endif
# else
# error "Unknown arch"
# endif
printf("\n---START---\n");
#if 1
run_simulator();
#else
( (void(*)(HWord(*)(HWord,HWord))) entryP ) (serviceFn);
#endif
return 0;
}