普通文本  |  381行  |  16.05 KB

#!/usr/bin/env python3
#  Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from concurrent import futures

import itertools
import sys
import re
import pygraphviz as gv
import ply.lex as lex
import ply.yacc as yacc
from functools import lru_cache as memoize

diagnostic_header_pattern = re.compile('[^ ]+\.[^ ]+:[0-9]+:[0-9]+: ([^ ]*): (.*)')
in_file_included_from_pattern = re.compile('In file included from .*:')
in_instantiation_of_template_pattern = re.compile('in instantiation of (.*) (?:requested|required) here')
static_warning_marked_deprecated_here_pattern = re.compile('\'static_warning\' has been explicitly marked deprecated here')

class Diagnostic:
    def __init__(self, kind, message):
        self.kind = kind
        self.message = message
        self.template_instantiation_trace = []

tokens = (
    'LPAREN',
    'RPAREN',
    'LBRACKET',
    'RBRACKET',
    'LBRACE',
    'RBRACE',
    'LESS_THAN',
    'GREATER_THAN',
    'DOUBLE_COLON',
    'COMMA',
    'IDENTIFIER',
    'ASTERISK',
    'AMPERSAND',
)

t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE =  r'}'
t_RBRACE = r'{'
t_LESS_THAN = r'<'
t_GREATER_THAN = r'>'
t_DOUBLE_COLON = r'::'
t_COMMA = r','
t_ASTERISK = r'\*'
t_AMPERSAND = r'&'
# We conflate numbers as identifiers too, we don't care about the difference.
t_IDENTIFIER = r'[a-zA-Z0-9_]+'

t_ignore = ' \t'

def t_error(t):
    raise Exception("Illegal character '%s' followed by %s" % (t.value[0], t.value[1:]))

class LayoutNeedsMultipleLinesException(Exception):
    pass

class AstNode:
    def __str__(self):
        return ''.join(self)

class TerminalAstNode(AstNode):
    def __init__(self, s):
        self.s = s
        self.is_multiline = (s == '\n')
        # last_line_length is the string length if s is not a multiline string.
        # For multiline strings ending in a newline, this is 0.
        if self.is_multiline:
            self.first_line_length = 0
            self.last_line_length = 0
            self.max_line_length = 0
        else:
            # This never happens ATM, so we don't handle it.
            assert not '\n' in s

            self.first_line_length = len(s)
            self.last_line_length = len(s)
            self.max_line_length = len(s)

    def __iter__(self):
        return iter((self.s,))

class NonTerminalAstNode(AstNode):
    def __init__(self, children_ast_nodes):
        self.children_ast_nodes = children_ast_nodes
        first_line_length = 0
        last_line_length = 0
        is_multiline = False
        max_line_length = 0
        for node in children_ast_nodes:
            if node.is_multiline:
                last_line_length = node.last_line_length
                max_line_length = max(max_line_length, last_line_length + node.first_line_length, node.max_line_length)
                is_multiline = True
            else:
                last_line_length += node.last_line_length
                max_line_length = max(max_line_length, last_line_length)

        self.first_line_length = first_line_length
        self.last_line_length = last_line_length
        self.is_multiline = is_multiline
        self.max_line_length = max_line_length

    def __iter__(self):
        return itertools.chain(*self.children_ast_nodes)

max_line_length = 80
# Size of an indent in spaces.
single_indent_length = 4

class TerminalNodeFactory():
    def __init__(self, s):
        self.s = s

    def __call__(self, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only):
        return TerminalAstNode(self.s)

# 'balanced_string' nodes evaluate to a function (or a callable object) taking these parameters:
#     current_indent (integer): the indentation in the current line (spaces only)
#     current_line_length (integer): the number of preceding characters in the current line (>=current_indent)
#     inside_meta_type (boolean): whether we're inside a Type<...>
#     last_token_was_type_wrapper (boolean): whether the immediately-preceding token was the identifier 'Type'
#  and returning an AstNode
# 'comma_separated_balanced_string' nodes evaluate to a tuple of such functions

def p_comma_separated_balanced_string_empty(p):
    'comma_separated_balanced_string : '
    p[0] = tuple()

def p_comma_separated_balanced_string_not_empty(p):
    'comma_separated_balanced_string : COMMA balanced_string comma_separated_balanced_string'
    p[0] = (
        p[2],
        *(p[3])
    )

def p_optional_balanced_string_empty(p):
    'optional_balanced_string : '
    p[0] = TerminalNodeFactory('')

def p_optional_balanced_string_not_empty(p):
    'optional_balanced_string : balanced_string'
    p[0] = p[1]

class BalancedStringTerminalNodeFactory():
    def __init__(self, first_token, node_factory):
        self.first_token = first_token
        self.node_factory = node_factory

    def __call__(self, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only):
        terminal_node = TerminalAstNode(self.first_token)
        non_terminal_node = self.node_factory(
            current_indent,
            current_line_length + len(self.first_token),
            inside_meta_type,
            self.first_token == 'Type',
            accept_single_line_only)
        if non_terminal_node is None:
            return None
        return NonTerminalAstNode((terminal_node, non_terminal_node))

def p_balanced_string_terminal(p):
    '''balanced_string : DOUBLE_COLON balanced_string
                       | IDENTIFIER optional_balanced_string
                       | ASTERISK optional_balanced_string
                       | AMPERSAND optional_balanced_string
    '''
    first_token = p[1]
    node_factory = p[2]

    p[0] = BalancedStringTerminalNodeFactory(first_token, node_factory)

def create_composite_node_from_factories(node_factory_inside_meta_type_pairs, current_line_length, accept_single_line_only):
    nodes = []
    for node_factory, current_indent, inside_meta_type in node_factory_inside_meta_type_pairs:
        node = node_factory(current_indent, current_line_length, inside_meta_type, False, accept_single_line_only)
        if node is None:
            return None
        nodes.append(node)
        if node.is_multiline:
            if accept_single_line_only:
                raise Exception('Unexpected multiline, due to factory: ' + node_factory)
            # Note that due to the way we break lines, the last line will have the same indent as the first.
            # So we don't need to update current_indent here.
            current_line_length = node.last_line_length
        else:
            current_line_length += node.last_line_length
    return NonTerminalAstNode(nodes)

def compute_layout(left_token, intermediate_node_factories, right_token, rhs_node_factory, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only):
    # We lay out the result in one of two ways:
    #
    # $previousIndent $previousContent LPAREN x1, x2, x3 RPAREN balanced_string
    #
    # Or:
    #
    # $previousIndent $previousContent LPAREN
    # $previousIndent $indent x1 ,
    # $previousIndent $indent x2 ,
    # $previousIndent $indent x3 RPAREN balanced_string

    entering_meta_type = last_token_was_type_wrapper

    # First, we try to use the first format if possible
    node_factory_inside_meta_type_pairs = [
        (TerminalNodeFactory(left_token), current_indent, inside_meta_type),
        *((intermediate_node_factory, current_indent, (inside_meta_type or entering_meta_type))
          for intermediate_node_factory in intermediate_node_factories),
        (TerminalNodeFactory(right_token), current_indent, inside_meta_type),
        (rhs_node_factory, current_indent, inside_meta_type),
    ]
    node_with_single_line_layout = create_composite_node_from_factories(node_factory_inside_meta_type_pairs, current_line_length, True)
    if node_with_single_line_layout is not None and node_with_single_line_layout.max_line_length <= max_line_length:
        assert not node_with_single_line_layout.is_multiline
        return node_with_single_line_layout

    if accept_single_line_only:
        return None

    # The result exceeds the line length, let's switch to the second one.
    node_factory_inside_meta_type_pairs = [
        (TerminalNodeFactory(left_token),
         current_indent,
         inside_meta_type)
    ]
    new_indent_length = current_indent + single_indent_length
    comma_node_factory_inside_meta_type_pair = (TerminalNodeFactory(','), current_indent, inside_meta_type or entering_meta_type)
    newline_node_factory_inside_meta_type_pair = (TerminalNodeFactory('\n'), current_indent, inside_meta_type or entering_meta_type)
    indent_node_factory_inside_meta_type_pair = (TerminalNodeFactory(' ' * new_indent_length), current_indent, inside_meta_type or entering_meta_type)
    for inner_node_factory in intermediate_node_factories:
        node_factory_inside_meta_type_pairs.append(newline_node_factory_inside_meta_type_pair)
        node_factory_inside_meta_type_pairs.append(indent_node_factory_inside_meta_type_pair)
        node_factory_inside_meta_type_pairs.append((inner_node_factory, new_indent_length, inside_meta_type or entering_meta_type))
        node_factory_inside_meta_type_pairs.append(comma_node_factory_inside_meta_type_pair)
    node_factory_inside_meta_type_pairs.pop()
    node_factory_inside_meta_type_pairs.append((TerminalNodeFactory(right_token), current_indent, inside_meta_type))
    node_factory_inside_meta_type_pairs.append((rhs_node_factory, current_indent, inside_meta_type))
    return create_composite_node_from_factories(node_factory_inside_meta_type_pairs, current_line_length, accept_single_line_only)


def p_balanced_string_with_balanced_token_no_comma_separated_elems(p):
    '''balanced_string : LPAREN    RPAREN       optional_balanced_string
                       | LBRACKET  RBRACKET     optional_balanced_string
                       | LBRACE    RBRACE       optional_balanced_string
                       | LESS_THAN GREATER_THAN optional_balanced_string
    '''
    p_1 = p[1]
    p_2 = p[2]
    p_3 = p[3]
    def result(current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only):
        return compute_layout(p_1, [], p_2, p_3, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only)

    p[0] = result

def p_balanced_string_with_balanced_token_some_comma_separated_elems(p):
    '''balanced_string : LPAREN    balanced_string comma_separated_balanced_string RPAREN       optional_balanced_string
                       | LBRACKET  balanced_string comma_separated_balanced_string RBRACKET     optional_balanced_string
                       | LBRACE    balanced_string comma_separated_balanced_string RBRACE       optional_balanced_string
                       | LESS_THAN balanced_string comma_separated_balanced_string GREATER_THAN optional_balanced_string
    '''
    p_1 = p[1]
    p_2 = p[2]
    p_3 = p[3]
    p_4 = p[4]
    p_5 = p[5]
    def result(current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only):
        if not inside_meta_type:
            if p_1 == '(' and p_4 == ')':
                if len(p_3) == 0:
                    if isinstance(p_2, BalancedStringTerminalNodeFactory) and p_2.first_token == '*':
                        if isinstance(p_2.node_factory, TerminalNodeFactory) and p_2.node_factory.s == '':
                            # Special case: we're not inside a Type<...> and we've encountered a '(*)'.
                            # Discard it and just print the rhs.
                            return p_5(current_indent, current_line_length, inside_meta_type, False, accept_single_line_only)

        return compute_layout(p_1, (p_2, *(p_3)), p_4, p_5, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only)

    p[0] = result

def p_error(p):
    raise Exception("Syntax error when parsing meta type: ", p[:])

lexer = lex.lex()
parser = yacc.yacc(start='balanced_string')

strings_to_remove = re.compile(r'template class |template type alias |function template specialization |member class |member function |default argument for |fruit::impl::meta::|fruit::impl::|fruit::')

def do_simplify_template_trace_element(element):
    element, _ = re.subn(strings_to_remove, '', element)
    element = element.strip()
    if element[0] != '\'' or element[-1] != '\'':
        raise Exception('Expected single quotes in: ' + element)
    element = element[1:-1]
    if element.startswith('DoEval<') and element[-1] == '>':
        element = element[7:-1]
    result = ''.join(parser.parse(element, lexer)(0, 0, False, False, False))
    return result

@memoize(maxsize=1000)
def simplify_template_trace_element(element, executor):
    return executor.submit(do_simplify_template_trace_element, element)

def to_dot_left_justified_string(s):
    return '\\l'.join(s.splitlines() + [''])

def main():
    diagnostics = []

    with futures.ProcessPoolExecutor() as executor:
        lines = sys.stdin.readlines()
        for line_number, line in enumerate(lines):
            # Remove the newline
            line = line[:-1]

            matches = in_file_included_from_pattern.search(line)
            if matches:
                continue

            matches = diagnostic_header_pattern.search(line)
            if matches:
                diagnostic_kind, diagnostic_message = matches.groups()
                if diagnostic_kind == 'error':
                    diagnostics.append(Diagnostic(diagnostic_kind, diagnostic_message))
                    print('Processing diagnostic. (%s / %s) ' % (line_number, len(lines)), file=sys.stderr)
                elif diagnostic_kind == 'note':
                    matches = in_instantiation_of_template_pattern.search(diagnostic_message)
                    if matches:
                        if not diagnostics:
                            raise Exception('Found template instantiation note before any error diagnostic: %s' % diagnostic_message)
                        if 'in instantiation of template type alias' in line:
                            pass
                        else:
                            group = matches.groups()[0]
                            trace_element_future = simplify_template_trace_element(group, executor)
                            diagnostics[-1].template_instantiation_trace.append(trace_element_future)
                        continue

                    matches = static_warning_marked_deprecated_here_pattern.search(diagnostic_message)
                    if matches:
                        continue

                    raise Exception('Found unknown note: %s' % diagnostic_message)

        call_graph = {}
        graph = gv.AGraph(directed=True)

        for diagnostic_index, diagnostic in enumerate(diagnostics):
            if diagnostic_index % 10 == 0:
                print('Constructing dep graph: iteration %s/%s' % (diagnostic_index, len(diagnostics)), file=sys.stderr)

            template_instantiation_trace = [trace_element_future.result() for trace_element_future in diagnostic.template_instantiation_trace]
            for called, caller in zip(template_instantiation_trace[1:], template_instantiation_trace[2:]):
                if called in call_graph and call_graph[called] != caller:
                    # Avoid this edge, so that the resulting graph is a tree
                    continue
                graph.add_edge(to_dot_left_justified_string(caller), to_dot_left_justified_string(called))
                call_graph[called] = caller

        print(graph)

if __name__ == '__main__':
    main()