#!/usr/bin/env python2

import argparse
import os
import pipes
import re
import sys

from utils import FindBaseNaCl, GetObjcopyCmd, get_sfi_string, shellcmd

def NewerThanOrNotThere(old_path, new_path):
    """Returns whether old_path is newer than new_path.

    Also returns true if either path doesn't exist.
    """
    if not (os.path.exists(old_path) and os.path.exists(new_path)):
        return True
    return os.path.getmtime(old_path) > os.path.getmtime(new_path)

def BuildRegex(patterns, syms):
    """Build a regular expression string for inclusion or exclusion.

    Creates a regex string from an array of patterns and an array
    of symbol names.  Each element in the patterns array is either a
    regex, or a range of entries in the symbol name array, e.g. '2:9'.
    """
    pattern_list = []
    for pattern in patterns:
        if pattern[0].isdigit() or pattern[0] == ':':
            # Legitimate symbols or regexes shouldn't start with a
            # digit or a ':', so interpret the pattern as a range.
            interval = pattern.split(':')
            if len(interval) == 1:
                # Treat singleton 'n' as 'n:n+1'.
                lower = int(interval[0])
                upper = lower + 1
            elif len(interval) == 2:
                # Handle 'a:b', 'a:', and ':b' with suitable defaults.
                lower = int(interval[0]) if len(interval[0]) else 0
                upper = int(interval[1]) if len(interval[1]) else len(syms)
            else:
                print 'Invalid range syntax: {p}'.format(p=pattern)
                exit(1)
            pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]])
        pattern_list.append('^' + pattern + '$')
    return '|'.join(pattern_list) if len(pattern_list) else '^$'

def MatchSymbol(sym, re_include, re_exclude, default_match):
    """Match a symbol name against inclusion/exclusion rules.

    Returns True or False depending on whether the given symbol
    matches the compiled include or exclude regexes.  The default is
    returned if neither the include nor the exclude regex matches.
    """
    if re_exclude.match(sym):
        # Always honor an explicit exclude before considering
        # includes.
        return False
    if re_include.match(sym):
        return True
    return default_match

def AddOptionalArgs(argparser):
    argparser.add_argument('--force', dest='force', type=int, choices=[0, 1],
                           default=1,
                           help='Force all re-translations of the pexe.' +
                                ' Default %(default)s.')
    argparser.add_argument('--include', '-i', default=[], dest='include',
                           action='append',
                           help='Subzero symbols to include ' +
                                '(regex or line range)')
    argparser.add_argument('--exclude', '-e', default=[], dest='exclude',
                           action='append',
                           help='Subzero symbols to exclude ' +
                                '(regex or line range)')
    argparser.add_argument('--output', '-o', default='a.out', dest='output',
                           action='store',
                           help='Output executable. Default %(default)s.')
    argparser.add_argument('-O', default='2', dest='optlevel',
                           choices=['m1', '-1', '0', '1', '2'],
                           help='Optimization level ' +
                                '(m1 and -1 are equivalent).' +
                                ' Default %(default)s.')
    argparser.add_argument('--filetype', default='iasm', dest='filetype',
                           choices=['obj', 'asm', 'iasm'],
                           help='Output file type.  Default %(default)s.')
    argparser.add_argument('--sandbox', dest='sandbox', action='store_true',
                           help='Enable sandboxing in the translator')
    argparser.add_argument('--nonsfi', dest='nonsfi', action='store_true',
                           help='Enable Non-SFI in the translator')
    argparser.add_argument('--enable-block-profile',
                           dest='enable_block_profile', action='store_true',
                           help='Enable basic block profiling.')
    argparser.add_argument('--target', default='x8632', dest='target',
                           choices=['arm32', 'x8632', 'x8664'],
                           help='Generate code for specified target.')
    argparser.add_argument('--verbose', '-v', dest='verbose',
                           action='store_true',
                           help='Display some extra debugging output')
    argparser.add_argument('--sz', dest='sz_args', action='append', default=[],
                           help='Extra arguments for Subzero')
    argparser.add_argument('--llc', dest='llc_args', action='append',
                           default=[], help='Extra arguments for llc')
    argparser.add_argument('--no-sz', dest='nosz', action='store_true',
                           help='Run only post-Subzero build steps')
    argparser.add_argument('--fsanitize-address', dest='asan',
                           action='store_true',
                           help='Instrument with AddressSanitizer')

def LinkSandbox(objs, exe, target, verbose=True):
    assert target in ('x8632', 'x8664', 'arm32'), \
        '-sandbox is not available for %s' % target
    nacl_root = FindBaseNaCl()
    gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' +
            'le32-nacl-ld.gold').format(root=nacl_root)
    target_lib_dir = {
      'arm32': 'arm',
      'x8632': 'x86-32',
      'x8664': 'x86-64',
    }[target]
    linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
               '{target_dir}/lib').format(root=nacl_root,
                                          target_dir=target_lib_dir)
    shellcmd([gold,
              '-nostdlib',
              '--no-fix-cortex-a8',
              '--eh-frame-hdr',
              '-z', 'text',
              #'-z', 'noexecstack',
              '--build-id',
              '--entry=__pnacl_start',
              '-static', #'-pie',
              '{linklib}/crtbegin.o'.format(linklib=linklib)] +
             objs +
             [('{root}/toolchain_build/src/subzero/build/runtime/' +
               'szrt_sb_{target}.o').format(root=nacl_root, target=target),
              '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib),
              '--start-group',
              '{linklib}/libgcc.a'.format(linklib=linklib),
              '{linklib}/libcrt_platform.a'.format(linklib=linklib),
              '--end-group',
              '{linklib}/crtend.o'.format(linklib=linklib),
              '--undefined=_start',
              '--defsym=__Sz_AbsoluteZero=0',
              #'--defsym=_begin=0',
              '-o', exe
             ], echo=verbose)

def LinkNonsfi(objs, exe, target, verbose=True):
    nacl_root = FindBaseNaCl()
    gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' +
            'le32-nacl-ld.gold').format(root=nacl_root)
    target_lib_dir = {
      'arm32': 'arm-nonsfi',
      'x8632': 'x86-32-nonsfi',
    }[target]
    linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
               '{target_dir}/lib').format(root=nacl_root,
                                          target_dir=target_lib_dir)
    shellcmd([gold,
              '-nostdlib',
              '--no-fix-cortex-a8',
              '--eh-frame-hdr',
              '-z', 'text',
              '-z', 'noexecstack',
              '--build-id',
              '--entry=__pnacl_start',
              '-pie',
              '{linklib}/crtbegin.o'.format(linklib=linklib)] +
             objs +
             [('{root}/toolchain_build/src/subzero/build/runtime/' +
               'szrt_nonsfi_{target}.o').format(root=nacl_root, target=target),
              '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib),
              '--start-group',
              '{linklib}/libgcc.a'.format(linklib=linklib),
              '{linklib}/libcrt_platform.a'.format(linklib=linklib),
              '--end-group',
              '{linklib}/crtend.o'.format(linklib=linklib),
              '--undefined=_start',
              '--defsym=__Sz_AbsoluteZero=0',
              '--defsym=_begin=0',
              '-o', exe
             ], echo=verbose)

def LinkNative(objs, exe, target, verbose=True):
    nacl_root = FindBaseNaCl()
    linker = {
      'arm32': '/usr/bin/arm-linux-gnueabihf-g++',
      'mips32': '/usr/bin/mipsel-linux-gnu-g++',
      'x8632': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang'
               ).format(root=nacl_root),
      'x8664': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang'
               ).format(root=nacl_root)
    }[target]

    extra_linker_args = {
      'arm32': ['-mcpu=cortex-a9'],
      'x8632': ['-m32'],
      'x8664': ['-mx32']
    }[target]

    lib_dir = {
      'arm32': 'arm-linux',
      'x8632': 'x86-32-linux',
      'x8664': 'x86-64-linux',
    }[target]

    shellcmd([linker] +
             extra_linker_args +
             objs +
             ['-o', exe,
              ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
               '{lib_dir}/lib/' +
               '{{unsandboxed_irt,irt_random,irt_query_list}}.o').format(
                   root=nacl_root, lib_dir=lib_dir),
              ('{root}/toolchain_build/src/subzero/build/runtime/' +
               'szrt_native_{target}.o').format(root=nacl_root, target=target),
              '-lm', '-lpthread', '-lrt',
              '-Wl,--defsym=__Sz_AbsoluteZero=0'
             ], echo=verbose)

def main():
    """Create a hybrid translation from Subzero and llc.

    Takes a finalized pexe and builds a native executable as a hybrid of Subzero
    and llc translated bitcode.  Linker tricks are used to determine whether
    Subzero or llc generated symbols are used, on a per-symbol basis.

    By default, for every symbol, its Subzero version is used.  Subzero and llc
    symbols can be selectively enabled/disabled via regular expressions on the
    symbol name, or by ranges of lines in this program's auto-generated symbol
    file.

    For each symbol, the --exclude arguments are first checked (the symbol is
    'rejected' on a match), followed by the --include arguments (the symbol is
    'accepted' on a match), followed by unconditional 'rejection'.  The Subzero
    version is used for an 'accepted' symbol, and the llc version is used for a
    'rejected' symbol.

    Each --include and --exclude argument can be a regular expression or a range
    of lines in the symbol file.  Each regular expression is wrapped inside
    '^$', so if you want a substring match on 'foo', use '.*foo.*' instead.
    Ranges use python-style 'first:last' notation, so e.g. use '0:10' or ':10'
    for the first 10 lines of the file, or '1' for the second line of the file.

    If no --include or --exclude arguments are given, the executable is produced
    entirely using Subzero, without using llc or linker tricks.

    When using the --force=0 option, this script uses file modification
    timestamps to determine whether llc and Subzero re-translation are needed.
    It checks timestamps of llc, pnacl-sz, and the pexe against the translated
    object files to determine the minimal work necessary.  The --force=1 option
    (default) suppresses those checks and re-translates everything.

    This script expects various PNaCl and LLVM tools to be found within the
    native_client tree.  When changes are made to these tools, copy them this
    way:
      cd native_client
      toolchain_build/toolchain_build_pnacl.py llvm_x86_64_linux \\
      --install=toolchain/linux_x86/pnacl_newlib_raw
    """
    argparser = argparse.ArgumentParser(
        description='    ' + main.__doc__,
        formatter_class=argparse.RawTextHelpFormatter)
    AddOptionalArgs(argparser)
    argparser.add_argument('pexe', help='Finalized pexe to translate')
    args = argparser.parse_args()
    pexe = args.pexe
    exe = args.output
    ProcessPexe(args, pexe, exe)

def ProcessPexe(args, pexe, exe):
    [pexe_base, ext] = os.path.splitext(pexe)
    if ext != '.pexe':
        pexe_base = pexe
    pexe_base_unescaped = pexe_base
    pexe_base = pipes.quote(pexe_base)
    pexe = pipes.quote(pexe)

    nacl_root = FindBaseNaCl()
    path_addition = (
        '{root}/toolchain/linux_x86/pnacl_newlib_raw/bin'
        ).format(root=nacl_root)
    obj_llc = pexe_base + '.llc.o'
    obj_sz = pexe_base + '.sz.o'
    asm_sz = pexe_base + '.sz.s'
    obj_llc_weak = pexe_base + '.weak.llc.o'
    obj_sz_weak = pexe_base + '.weak.sz.o'
    obj_partial = obj_sz  # overridden for hybrid mode
    sym_llc = pexe_base + '.sym.llc.txt'
    sym_sz = pexe_base + '.sym.sz.txt'
    sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt'
    whitelist_sz = pexe_base + '.wl.sz.txt'
    whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt'
    pnacl_sz = (
        '{root}/toolchain_build/src/subzero/pnacl-sz'
        ).format(root=nacl_root)
    llcbin = '{base}/pnacl-llc'.format(base=path_addition)
    gold = '{base}/le32-nacl-ld.gold'.format(base=path_addition)
    objcopy = '{base}/{objcopy}'.format(base=path_addition,
                                        objcopy=GetObjcopyCmd(args.target))
    opt_level = args.optlevel
    opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' }
    hybrid = args.include or args.exclude
    native = not args.sandbox and not args.nonsfi
    if args.asan:
        if args.sandbox or args.nonsfi:
            print 'Can only use AddressSanitizer with a native build'
            exit(1)
        if '-fsanitize-address' not in args.sz_args:
          args.sz_args.append('-fsanitize-address')

    if hybrid and (args.force or
                   NewerThanOrNotThere(pexe, obj_llc) or
                   NewerThanOrNotThere(llcbin, obj_llc)):
        arch = {
          'arm32': 'arm' + get_sfi_string(args, 'v7', '-nonsfi', '-nonsfi'),
          'x8632': 'x86-32' + get_sfi_string(args, '', '-nonsfi', '-linux'),
          'x8664': 'x86-64' + get_sfi_string(args, '', '', '-linux')
        }[args.target]

        # Only run pnacl-translate in hybrid mode.
        shellcmd(['{base}/pnacl-translate'.format(base=path_addition),
                  '-split-module=1',
                  '-ffunction-sections',
                  '-fdata-sections',
                  '-c',
                  '-arch',  arch,
                  '-O' + opt_level_map[opt_level],
                  '--pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize',
                  '-o', obj_llc] +
                 (['--pnacl-driver-verbose'] if args.verbose else []) +
                 args.llc_args +
                 [pexe],
                 echo=args.verbose)
        if native:
            shellcmd((
                '{objcopy} --redefine-sym _start=_user_start {obj}'
                ).format(objcopy=objcopy, obj=obj_llc), echo=args.verbose)
        # Generate llc syms file for consistency, even though it's not used.
        shellcmd((
            'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
            ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose)

    if (args.force or
        NewerThanOrNotThere(pexe, obj_sz) or
        NewerThanOrNotThere(pnacl_sz, obj_sz)):
        if not args.nosz:
            # Run pnacl-sz regardless of hybrid mode.
            shellcmd([pnacl_sz,
                      '-O' + opt_level,
                      '-bitcode-format=pnacl',
                      '-filetype=' + args.filetype,
                      '-o', obj_sz if args.filetype == 'obj' else asm_sz,
                      '-target=' + args.target] +
                     (['-externalize',
                       '-ffunction-sections',
                       '-fdata-sections'] if hybrid else []) +
                     (['-sandbox'] if args.sandbox else []) +
                     (['-nonsfi'] if args.nonsfi else []) +
                     (['-enable-block-profile'] if
                          args.enable_block_profile and not args.sandbox
                          else []) +
                     args.sz_args +
                     [pexe],
                     echo=args.verbose)
        if args.filetype != 'obj':
            triple = {
              'arm32': 'arm' + get_sfi_string(args, '-nacl', '', ''),
              'x8632': 'i686' + get_sfi_string(args, '-nacl', '', ''),
              'x8664': 'x86_64' +
                        get_sfi_string(args, '-nacl', '-linux-gnux32',
                                       '-linux-gnux32'),
            }[args.target]

            shellcmd((
                '{base}/llvm-mc -triple={triple} -filetype=obj -o {obj} {asm}'
                ).format(base=path_addition, asm=asm_sz, obj=obj_sz,
                         triple=triple),
                     echo=args.verbose)
        if native:
            shellcmd((
                '{objcopy} --redefine-sym _start=_user_start {obj}'
                ).format(objcopy=objcopy, obj=obj_sz), echo=args.verbose)
        if hybrid:
            shellcmd((
                'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
                ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose)

    if hybrid:
        with open(sym_sz_unescaped) as f:
            sz_syms = f.read().splitlines()
        re_include_str = BuildRegex(args.include, sz_syms)
        re_exclude_str = BuildRegex(args.exclude, sz_syms)
        re_include = re.compile(re_include_str)
        re_exclude = re.compile(re_exclude_str)
        # If a symbol doesn't explicitly match re_include or re_exclude,
        # the default MatchSymbol() result is True, unless some --include
        # args are provided.
        default_match = not args.include

        whitelist_has_items = False
        with open(whitelist_sz_unescaped, 'w') as f:
            for sym in sz_syms:
                if MatchSymbol(sym, re_include, re_exclude, default_match):
                    f.write(sym + '\n')
                    whitelist_has_items = True
        shellcmd((
            '{objcopy} --weaken {obj} {weak}'
            ).format(objcopy=objcopy, obj=obj_sz, weak=obj_sz_weak),
            echo=args.verbose)
        if whitelist_has_items:
            # objcopy returns an error if the --weaken-symbols file is empty.
            shellcmd((
                '{objcopy} --weaken-symbols={whitelist} {obj} {weak}'
                ).format(objcopy=objcopy,
                         whitelist=whitelist_sz, obj=obj_llc,
                         weak=obj_llc_weak),
                     echo=args.verbose)
        else:
            shellcmd((
                '{objcopy} {obj} {weak}'
                ).format(objcopy=objcopy, obj=obj_llc, weak=obj_llc_weak),
                echo=args.verbose)
        obj_partial = pexe_base + '.o'
        ld = {
          'arm32': 'arm-linux-gnueabihf-ld',
          'x8632': 'ld',
          'x8664': 'ld',
        }[args.target]
        emulation = {
          'arm32': 'armelf_linux_eabi',
          'x8632': 'elf_i386',
          'x8664': 'elf32_x86_64' if not args.sandbox else 'elf_x86_64',
        }[args.target]
        shellcmd((
            '{ld} -r -m {emulation} -o {partial} {sz} {llc}'
            ).format(ld=ld, emulation=emulation, partial=obj_partial,
                     sz=obj_sz_weak, llc=obj_llc_weak),
                 echo=args.verbose)
        shellcmd((
            '{objcopy} -w --localize-symbol="*" {partial}'
            ).format(objcopy=objcopy, partial=obj_partial),
            echo=args.verbose)
        shellcmd((
            '{objcopy} --globalize-symbol={start} ' +
            '--globalize-symbol=__Sz_block_profile_info {partial}'
            ).format(objcopy=objcopy, partial=obj_partial,
                     start=get_sfi_string(args, '_start', '_start',
                                          '_user_start')),
                 echo=args.verbose)

    # Run the linker regardless of hybrid mode.
    if args.sandbox:
        LinkSandbox([obj_partial], exe, args.target, args.verbose)
    elif args.nonsfi:
        LinkNonsfi([obj_partial], exe, args.target, args.verbose)
    else:
        objs = [obj_partial]
        if args.asan:
            objs.append(
                ('{root}/toolchain_build/src/subzero/build/runtime/' +
                 'szrt_asan_{target}.o').format(root=nacl_root,
                                                target=args.target))
        LinkNative(objs, exe, args.target, args.verbose)

    # Put the extra verbose printing at the end.
    if args.verbose and hybrid:
        print 'include={regex}'.format(regex=re_include_str)
        print 'exclude={regex}'.format(regex=re_exclude_str)
        print 'default_match={dm}'.format(dm=default_match)
        print 'Number of Subzero syms = {num}'.format(num=len(sz_syms))

if __name__ == '__main__':
    main()