普通文本  |  401行  |  12.37 KB

#!/usr/bin/env python2
##########################################################################
# 
# Copyright 2008 VMware, Inc.
# All Rights Reserved.
# 
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sub license, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
# 
# The above copyright notice and this permission notice (including the
# next paragraph) shall be included in all copies or substantial portions
# of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# 
##########################################################################


import sys
import xml.parsers.expat
import optparse

from model import *


ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF = range(4)


class XmlToken:

    def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
        assert type in (ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF)
        self.type = type
        self.name_or_data = name_or_data
        self.attrs = attrs
        self.line = line
        self.column = column

    def __str__(self):
        if self.type == ELEMENT_START:
            return '<' + self.name_or_data + ' ...>'
        if self.type == ELEMENT_END:
            return '</' + self.name_or_data + '>'
        if self.type == CHARACTER_DATA:
            return self.name_or_data
        if self.type == EOF:
            return 'end of file'
        assert 0


class XmlTokenizer:
    """Expat based XML tokenizer."""

    def __init__(self, fp, skip_ws = True):
        self.fp = fp
        self.tokens = []
        self.index = 0
        self.final = False
        self.skip_ws = skip_ws
        
        self.character_pos = 0, 0
        self.character_data = ''
        
        self.parser = xml.parsers.expat.ParserCreate()
        self.parser.StartElementHandler  = self.handle_element_start
        self.parser.EndElementHandler    = self.handle_element_end
        self.parser.CharacterDataHandler = self.handle_character_data
    
    def handle_element_start(self, name, attributes):
        self.finish_character_data()
        line, column = self.pos()
        token = XmlToken(ELEMENT_START, name, attributes, line, column)
        self.tokens.append(token)
    
    def handle_element_end(self, name):
        self.finish_character_data()
        line, column = self.pos()
        token = XmlToken(ELEMENT_END, name, None, line, column)
        self.tokens.append(token)

    def handle_character_data(self, data):
        if not self.character_data:
            self.character_pos = self.pos()
        self.character_data += data
    
    def finish_character_data(self):
        if self.character_data:
            if not self.skip_ws or not self.character_data.isspace(): 
                line, column = self.character_pos
                token = XmlToken(CHARACTER_DATA, self.character_data, None, line, column)
                self.tokens.append(token)
            self.character_data = ''
    
    def next(self):
        size = 16*1024
        while self.index >= len(self.tokens) and not self.final:
            self.tokens = []
            self.index = 0
            data = self.fp.read(size)
            self.final = len(data) < size
            data = data.rstrip('\0')
            try:
                self.parser.Parse(data, self.final)
            except xml.parsers.expat.ExpatError, e:
                #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
                if e.code == 3:
                    pass
                else:
                    raise e
        if self.index >= len(self.tokens):
            line, column = self.pos()
            token = XmlToken(EOF, None, None, line, column)
        else:
            token = self.tokens[self.index]
            self.index += 1
        return token

    def pos(self):
        return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber


class TokenMismatch(Exception):

    def __init__(self, expected, found):
        self.expected = expected
        self.found = found

    def __str__(self):
        return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))



class XmlParser:
    """Base XML document parser."""

    def __init__(self, fp):
        self.tokenizer = XmlTokenizer(fp)
        self.consume()
    
    def consume(self):
        self.token = self.tokenizer.next()

    def match_element_start(self, name):
        return self.token.type == ELEMENT_START and self.token.name_or_data == name
    
    def match_element_end(self, name):
        return self.token.type == ELEMENT_END and self.token.name_or_data == name

    def element_start(self, name):
        while self.token.type == CHARACTER_DATA:
            self.consume()
        if self.token.type != ELEMENT_START:
            raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
        if self.token.name_or_data != name:
            raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
        attrs = self.token.attrs
        self.consume()
        return attrs
    
    def element_end(self, name):
        while self.token.type == CHARACTER_DATA:
            self.consume()
        if self.token.type != ELEMENT_END:
            raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
        if self.token.name_or_data != name:
            raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
        self.consume()

    def character_data(self, strip = True):
        data = ''
        while self.token.type == CHARACTER_DATA:
            data += self.token.name_or_data
            self.consume()
        if strip:
            data = data.strip()
        return data


class TraceParser(XmlParser):

    def __init__(self, fp):
        XmlParser.__init__(self, fp)
        self.last_call_no = 0
    
    def parse(self):
        self.element_start('trace')
        while self.token.type not in (ELEMENT_END, EOF):
            call = self.parse_call()
            self.handle_call(call)
        if self.token.type != EOF:
            self.element_end('trace')

    def parse_call(self):
        attrs = self.element_start('call')
        try:
            no = int(attrs['no'])
        except KeyError:
            self.last_call_no += 1
            no = self.last_call_no
        else:
            self.last_call_no = no
        klass = attrs['class']
        method = attrs['method']
        args = []
        ret = None
        time = None
        while self.token.type == ELEMENT_START:
            if self.token.name_or_data == 'arg':
                arg = self.parse_arg()
                args.append(arg)
            elif self.token.name_or_data == 'ret':
                ret = self.parse_ret()
            elif self.token.name_or_data == 'call':
                # ignore nested function calls
                self.parse_call()
            elif self.token.name_or_data == 'time':
                time = self.parse_time()
            else:
                raise TokenMismatch("<arg ...> or <ret ...>", self.token)
        self.element_end('call')
        
        return Call(no, klass, method, args, ret, time)

    def parse_arg(self):
        attrs = self.element_start('arg')
        name = attrs['name']
        value = self.parse_value()
        self.element_end('arg')

        return name, value

    def parse_ret(self):
        attrs = self.element_start('ret')
        value = self.parse_value()
        self.element_end('ret')

        return value

    def parse_time(self):
        attrs = self.element_start('time')
        time = self.parse_value();
        self.element_end('time')
        return time

    def parse_value(self):
        expected_tokens = ('null', 'bool', 'int', 'uint', 'float', 'string', 'enum', 'array', 'struct', 'ptr', 'bytes')
        if self.token.type == ELEMENT_START:
            if self.token.name_or_data in expected_tokens:
                method = getattr(self, 'parse_' +  self.token.name_or_data)
                return method()
        raise TokenMismatch(" or " .join(expected_tokens), self.token)

    def parse_null(self):
        self.element_start('null')
        self.element_end('null')
        return Literal(None)
        
    def parse_bool(self):
        self.element_start('bool')
        value = int(self.character_data())
        self.element_end('bool')
        return Literal(value)
        
    def parse_int(self):
        self.element_start('int')
        value = int(self.character_data())
        self.element_end('int')
        return Literal(value)
        
    def parse_uint(self):
        self.element_start('uint')
        value = int(self.character_data())
        self.element_end('uint')
        return Literal(value)
        
    def parse_float(self):
        self.element_start('float')
        value = float(self.character_data())
        self.element_end('float')
        return Literal(value)
        
    def parse_enum(self):
        self.element_start('enum')
        name = self.character_data()
        self.element_end('enum')
        return NamedConstant(name)
        
    def parse_string(self):
        self.element_start('string')
        value = self.character_data()
        self.element_end('string')
        return Literal(value)
        
    def parse_bytes(self):
        self.element_start('bytes')
        value = self.character_data()
        self.element_end('bytes')
        return Blob(value)
        
    def parse_array(self):
        self.element_start('array')
        elems = []
        while self.token.type != ELEMENT_END:
            elems.append(self.parse_elem())
        self.element_end('array')
        return Array(elems)

    def parse_elem(self):
        self.element_start('elem')
        value = self.parse_value()
        self.element_end('elem')
        return value

    def parse_struct(self):
        attrs = self.element_start('struct')
        name = attrs['name']
        members = []
        while self.token.type != ELEMENT_END:
            members.append(self.parse_member())
        self.element_end('struct')
        return Struct(name, members)

    def parse_member(self):
        attrs = self.element_start('member')
        name = attrs['name']
        value = self.parse_value()
        self.element_end('member')

        return name, value

    def parse_ptr(self):
        self.element_start('ptr')
        address = self.character_data()
        self.element_end('ptr')

        return Pointer(address)

    def handle_call(self, call):
        pass
    
    
class TraceDumper(TraceParser):
    
    def __init__(self, fp, outStream = sys.stdout):
        TraceParser.__init__(self, fp)
        self.formatter = format.DefaultFormatter(outStream)
        self.pretty_printer = PrettyPrinter(self.formatter)

    def handle_call(self, call):
        call.visit(self.pretty_printer)
        self.formatter.newline()
        

class Main:
    '''Common main class for all retrace command line utilities.''' 

    def __init__(self):
        pass

    def main(self):
        optparser = self.get_optparser()
        (options, args) = optparser.parse_args(sys.argv[1:])
    
        if not args:
            optparser.error('insufficient number of arguments')

        for arg in args:
            if arg.endswith('.gz'):
                from gzip import GzipFile
                stream = GzipFile(arg, 'rt')
            elif arg.endswith('.bz2'):
                from bz2 import BZ2File
                stream = BZ2File(arg, 'rU')
            else:
                stream = open(arg, 'rt')
            self.process_arg(stream, options)

    def get_optparser(self):
        optparser = optparse.OptionParser(
            usage="\n\t%prog [options] TRACE  [...]")
        return optparser

    def process_arg(self, stream, options):
        parser = TraceDumper(stream)
        parser.parse()


if __name__ == '__main__':
    Main().main()