普通文本  |  358行  |  9.8 KB

#!/usr/bin/env python
##########################################################################
#
# Copyright 2011 Jose Fonseca
# All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
##########################################################################/


import json
import optparse
import re
import difflib
import sys


def strip_object_hook(obj):
    if '__class__' in obj:
        return None
    for name in obj.keys():
        if name.startswith('__') and name.endswith('__'):
            del obj[name]
    return obj


class Visitor:

    def visit(self, node, *args, **kwargs):
        if isinstance(node, dict):
            return self.visitObject(node, *args, **kwargs)
        elif isinstance(node, list):
            return self.visitArray(node, *args, **kwargs)
        else:
            return self.visitValue(node, *args, **kwargs)

    def visitObject(self, node, *args, **kwargs):
        pass

    def visitArray(self, node, *args, **kwargs):
        pass

    def visitValue(self, node, *args, **kwargs):
        pass


class Dumper(Visitor):

    def __init__(self, stream = sys.stdout):
        self.stream = stream
        self.level = 0

    def _write(self, s):
        self.stream.write(s)

    def _indent(self):
        self._write('  '*self.level)

    def _newline(self):
        self._write('\n')

    def visitObject(self, node):
        self.enter_object()

        members = node.keys()
        members.sort()
        for i in range(len(members)):
            name = members[i]
            value = node[name]
            self.enter_member(name)
            self.visit(value)
            self.leave_member(i == len(members) - 1)
        self.leave_object()

    def enter_object(self):
        self._write('{')
        self._newline()
        self.level += 1

    def enter_member(self, name):
        self._indent()
        self._write('%s: ' % name)

    def leave_member(self, last):
        if not last:
            self._write(',')
        self._newline()

    def leave_object(self):
        self.level -= 1
        self._indent()
        self._write('}')
        if self.level <= 0:
            self._newline()

    def visitArray(self, node):
        self.enter_array()
        for i in range(len(node)):
            value = node[i]
            self._indent()
            self.visit(value)
            if i != len(node) - 1:
                self._write(',')
            self._newline()
        self.leave_array()

    def enter_array(self):
        self._write('[')
        self._newline()
        self.level += 1

    def leave_array(self):
        self.level -= 1
        self._indent()
        self._write(']')

    def visitValue(self, node):
        self._write(json.dumps(node, allow_nan=True))



class Comparer(Visitor):

    def __init__(self, ignore_added = False, tolerance = 2.0 ** -24):
        self.ignore_added = ignore_added
        self.tolerance = tolerance

    def visitObject(self, a, b):
        if not isinstance(b, dict):
            return False
        if len(a) != len(b) and not self.ignore_added:
            return False
        ak = a.keys()
        bk = b.keys()
        ak.sort()
        bk.sort()
        if ak != bk and not self.ignore_added:
            return False
        for k in ak:
            ae = a[k]
            try:
                be = b[k]
            except KeyError:
                return False
            if not self.visit(ae, be):
                return False
        return True

    def visitArray(self, a, b):
        if not isinstance(b, list):
            return False
        if len(a) != len(b):
            return False
        for ae, be in zip(a, b):
            if not self.visit(ae, be):
                return False
        return True

    def visitValue(self, a, b):
        if isinstance(a, float) or isinstance(b, float):
            if a == 0:
                return abs(b) < self.tolerance
            else:
                return abs((b - a)/a) < self.tolerance
        else:
            return a == b


class Differ(Visitor):

    def __init__(self, stream = sys.stdout, ignore_added = False):
        self.dumper = Dumper(stream)
        self.comparer = Comparer(ignore_added = ignore_added)

    def visit(self, a, b):
        if self.comparer.visit(a, b):
            return
        Visitor.visit(self, a, b)

    def visitObject(self, a, b):
        if not isinstance(b, dict):
            self.replace(a, b)
        else:
            self.dumper.enter_object()
            names = set(a.keys())
            if not self.comparer.ignore_added:
                names.update(b.keys())
            names = list(names)
            names.sort()

            for i in range(len(names)):
                name = names[i]
                ae = a.get(name, None)
                be = b.get(name, None)
                if not self.comparer.visit(ae, be):
                    self.dumper.enter_member(name)
                    self.visit(ae, be)
                    self.dumper.leave_member(i == len(names) - 1)

            self.dumper.leave_object()

    def visitArray(self, a, b):
        if not isinstance(b, list):
            self.replace(a, b)
        else:
            self.dumper.enter_array()
            max_len = max(len(a), len(b))
            for i in range(max_len):
                try:
                    ae = a[i]
                except IndexError:
                    ae = None
                try:
                    be = b[i]
                except IndexError:
                    be = None
                self.dumper._indent()
                if self.comparer.visit(ae, be):
                    self.dumper.visit(ae)
                else:
                    self.visit(ae, be)
                if i != max_len - 1:
                    self.dumper._write(',')
                self.dumper._newline()

            self.dumper.leave_array()

    def visitValue(self, a, b):
        if a != b:
            self.replace(a, b)

    def replace(self, a, b):
        if isinstance(a, basestring) and isinstance(b, basestring):
            if '\n' in a or '\n' in b:
                a = a.splitlines()
                b = b.splitlines()
                differ = difflib.Differ()
                result = differ.compare(a, b)
                self.dumper.level += 1
                for entry in result:
                    self.dumper._newline()
                    self.dumper._indent()
                    tag = entry[:2]
                    text = entry[2:]
                    if tag == '? ':
                        tag = '  '
                        prefix = ' '
                        text = text.rstrip()
                        suffix = ''
                    else:
                        prefix = '"'
                        suffix = '\\n"'
                    line = tag + prefix + text + suffix
                    self.dumper._write(line)
                self.dumper.level -= 1
                return
        self.dumper.visit(a)
        self.dumper._write(' -> ')
        self.dumper.visit(b)

    def isMultilineString(self, value):
        return isinstance(value, basestring) and '\n' in value
    
    def replaceMultilineString(self, a, b):
        self.dumper.visit(a)
        self.dumper._write(' -> ')
        self.dumper.visit(b)


#
# Unfortunately JSON standard does not include comments, but this is a quite
# useful feature to have on regressions tests
#

_token_res = [
    r'//[^\r\n]*', # comment
    r'"[^"\\]*(\\.[^"\\]*)*"', # string
]

_tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL)


def _strip_comment(mo):
    if mo.group(1):
        return ''
    else:
        return mo.group(0)


def _strip_comments(data):
    '''Strip (non-standard) JSON comments.'''
    return _tokens_re.sub(_strip_comment, data)


assert _strip_comments('''// a comment
"// a comment in a string
"''') == '''
"// a comment in a string
"'''


def load(stream, strip_images = True, strip_comments = True):
    if strip_images:
        object_hook = strip_object_hook
    else:
        object_hook = None
    if strip_comments:
        data = stream.read()
        data = _strip_comments(data)
        return json.loads(data, strict=False, object_hook = object_hook)
    else:
        return json.load(stream, strict=False, object_hook = object_hook)


def main():
    optparser = optparse.OptionParser(
        usage="\n\t%prog [options] <ref_json> <src_json>")
    optparser.add_option(
        '--keep-images',
        action="store_false", dest="strip_images", default=True,
        help="compare images")

    (options, args) = optparser.parse_args(sys.argv[1:])

    if len(args) != 2:
        optparser.error('incorrect number of arguments')

    a = load(open(sys.argv[1], 'rt'), options.strip_images)
    b = load(open(sys.argv[2], 'rt'), options.strip_images)

    if False:
        dumper = Dumper()
        dumper.visit(a)

    differ = Differ()
    differ.visit(a, b)


if __name__ == '__main__':
    main()