#!/usr/bin/env python # Copyright 2014 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import argparse, os, sys, json, subprocess, pickle, StringIO parser = argparse.ArgumentParser( description = "Process the Blink points-to graph generated by the Blink GC plugin.") parser.add_argument( '-', dest='use_stdin', action='store_true', help='Read JSON graph files from stdin') parser.add_argument( '-c', '--detect-cycles', action='store_true', help='Detect cycles containing GC roots') parser.add_argument( '-s', '--print-stats', action='store_true', help='Statistics about ref-counted and traced objects') parser.add_argument( '-v', '--verbose', action='store_true', help='Verbose output') parser.add_argument( '--ignore-cycles', default=None, metavar='FILE', help='File with cycles to ignore') parser.add_argument( '--ignore-classes', nargs='*', default=[], metavar='CLASS', help='Classes to ignore when detecting cycles') parser.add_argument( '--pickle-graph', default=None, metavar='FILE', help='File to read/save the graph from/to') parser.add_argument( 'files', metavar='FILE_OR_DIR', nargs='*', default=[], help='JSON graph files or directories containing them') # Command line args after parsing. args = None # Map from node labels to nodes. graph = {} # Set of root nodes. roots = [] # List of cycles to ignore. ignored_cycles = [] # Global flag to determine exit code. global_reported_error = False def set_reported_error(value): global global_reported_error global_reported_error = value def reported_error(): return global_reported_error def log(msg): if args.verbose: print msg global_inc_copy = 0 def inc_copy(): global global_inc_copy global_inc_copy += 1 def get_node(name): return graph.setdefault(name, Node(name)) ptr_types = ('raw', 'ref', 'mem') def inc_ptr(dst, ptr): if ptr in ptr_types: node = graph.get(dst) if not node: return node.counts[ptr] += 1 def add_counts(s1, s2): for (k, v) in s2.iteritems(): s1[k] += s2[k] # Representation of graph nodes. Basically a map of directed edges. class Node: def __init__(self, name): self.name = name self.edges = {} self.reset() def __repr__(self): return "%s(%s) %s" % (self.name, self.visited, self.edges) def update_node(self, decl): # Currently we don't track any node info besides its edges. pass def update_edge(self, e): new_edge = Edge(**e) edge = self.edges.get(new_edge.key) if edge: # If an edge exist, its kind is the strongest of the two. edge.kind = max(edge.kind, new_edge.kind) else: self.edges[new_edge.key] = new_edge def super_edges(self): return [ e for e in self.edges.itervalues() if e.is_super() ] def subclass_edges(self): return [ e for e in self.edges.itervalues() if e.is_subclass() ] def reset(self): self.cost = sys.maxint self.visited = False self.path = None self.counts = {} for ptr in ptr_types: self.counts[ptr] = 0 def update_counts(self): for e in self.edges.itervalues(): inc_ptr(e.dst, e.ptr) # Representation of directed graph edges. class Edge: def __init__(self, **decl): self.src = decl['src'] self.dst = decl['dst'] self.lbl = decl['lbl'] self.ptr = decl['ptr'] self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root self.loc = decl['loc'] # The label does not uniquely determine an edge from a node. We # define the semi-unique key to be the concatenation of the # label and dst name. This is sufficient to track the strongest # edge to a particular type. For example, if the field A::m_f # has type HashMap<WeakMember<B>, Member<B>> we will have a # strong edge with key m_f#B from A to B. self.key = '%s#%s' % (self.lbl, self.dst) def __repr__(self): return '%s (%s) => %s' % (self.src, self.lbl, self.dst) def is_root(self): return self.kind == 2 def is_weak(self): return self.kind == 0 def keeps_alive(self): return self.kind > 0 def is_subclass(self): return self.lbl.startswith('<subclass>') def is_super(self): return self.lbl.startswith('<super>') def parse_file(filename): obj = json.load(open(filename)) return obj def build_graphs_in_dir(dirname): # TODO: Use plateform independent code, eg, os.walk files = subprocess.check_output( ['find', dirname, '-name', '*.graph.json']).split('\n') log("Found %d files" % len(files)) for f in files: f.strip() if len(f) < 1: continue build_graph(f) def build_graph(filename): for decl in parse_file(filename): if decl.has_key('name'): # Add/update a node entry name = decl['name'] node = get_node(name) node.update_node(decl) else: # Add/update an edge entry name = decl['src'] node = get_node(name) node.update_edge(decl) # Copy all non-weak edges from super classes to their subclasses. # This causes all fields of a super to be considered fields of a # derived class without tranitively relating derived classes with # each other. For example, if B <: A, C <: A, and for some D, D => B, # we don't want that to entail that D => C. def copy_super_edges(edge): if edge.is_weak() or not edge.is_super(): return inc_copy() # Make the super-class edge weak (prohibits processing twice). edge.kind = 0 # If the super class is not in our graph exit early. super_node = graph.get(edge.dst) if super_node is None: return # Recursively copy all super-class edges. for e in super_node.super_edges(): copy_super_edges(e) # Copy strong super-class edges (ignoring sub-class edges) to the sub class. sub_node = graph[edge.src] for e in super_node.edges.itervalues(): if e.keeps_alive() and not e.is_subclass(): new_edge = Edge( src = sub_node.name, dst = e.dst, lbl = '%s <: %s' % (super_node.name, e.lbl), ptr = e.ptr, kind = e.kind, loc = e.loc, ) sub_node.edges[new_edge.key] = new_edge # Add a strong sub-class edge. sub_edge = Edge( src = super_node.name, dst = sub_node.name, lbl = '<subclass>', ptr = edge.ptr, kind = 1, loc = edge.loc, ) super_node.edges[sub_edge.key] = sub_edge def complete_graph(): for node in graph.itervalues(): for edge in node.super_edges(): copy_super_edges(edge) for edge in node.edges.itervalues(): if edge.is_root(): roots.append(edge) log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy) def reset_graph(): for n in graph.itervalues(): n.reset() def shortest_path(start, end): start.cost = 0 minlist = [start] while len(minlist) > 0: minlist.sort(key=lambda n: -n.cost) current = minlist.pop() current.visited = True if current == end or current.cost >= end.cost + 1: return for e in current.edges.itervalues(): if not e.keeps_alive(): continue dst = graph.get(e.dst) if dst is None or dst.visited: continue if current.cost < dst.cost: dst.cost = current.cost + 1 dst.path = e minlist.append(dst) def detect_cycles(): for root_edge in roots: reset_graph() # Mark ignored classes as already visited for ignore in args.ignore_classes: name = ignore.find("::") > 0 and ignore or ("blink::" + ignore) node = graph.get(name) if node: node.visited = True src = graph[root_edge.src] dst = graph.get(root_edge.dst) if src.visited: continue if root_edge.dst == "WTF::String": continue if dst is None: print "\nPersistent root to incomplete destination object:" print root_edge set_reported_error(True) continue # Find the shortest path from the root target (dst) to its host (src) shortest_path(dst, src) if src.cost < sys.maxint: report_cycle(root_edge) def is_ignored_cycle(cycle): for block in ignored_cycles: if block_match(cycle, block): return True def block_match(b1, b2): if len(b1) != len(b2): return False for (l1, l2) in zip(b1, b2): if l1 != l2: return False return True def report_cycle(root_edge): dst = graph[root_edge.dst] path = [] edge = root_edge dst.path = None while edge: path.append(edge) edge = graph[edge.src].path path.append(root_edge) path.reverse() # Find the max loc length for pretty printing. max_loc = 0 for p in path: if len(p.loc) > max_loc: max_loc = len(p.loc) out = StringIO.StringIO() for p in path[:-1]: print >>out, (p.loc + ':').ljust(max_loc + 1), p sout = out.getvalue() if not is_ignored_cycle(sout): print "\nFound a potentially leaking cycle starting from a GC root:\n", sout set_reported_error(True) def load_graph(): global graph global roots log("Reading graph from pickled file: " + args.pickle_graph) dump = pickle.load(open(args.pickle_graph, 'rb')) graph = dump[0] roots = dump[1] def save_graph(): log("Saving graph to pickle file: " + args.pickle_graph) dump = (graph, roots) pickle.dump(dump, open(args.pickle_graph, 'wb')) def read_ignored_cycles(): global ignored_cycles if not args.ignore_cycles: return log("Reading ignored cycles from file: " + args.ignore_cycles) block = [] for l in open(args.ignore_cycles): line = l.strip() if not line or line.startswith('Found'): if len(block) > 0: ignored_cycles.append(block) block = [] else: block += l if len(block) > 0: ignored_cycles.append(block) gc_bases = ( 'blink::GarbageCollected', 'blink::GarbageCollectedFinalized', 'blink::GarbageCollectedMixin', ) ref_bases = ( 'WTF::RefCounted', 'WTF::ThreadSafeRefCounted', ) gcref_bases = ( 'blink::RefCountedGarbageCollected', 'blink::ThreadSafeRefCountedGarbageCollected', ) ref_mixins = ( 'blink::EventTarget', 'blink::EventTargetWithInlineData', 'blink::ActiveDOMObject', ) def print_stats(): gcref_managed = [] ref_managed = [] gc_managed = [] hierarchies = [] for node in graph.itervalues(): node.update_counts() for sup in node.super_edges(): if sup.dst in gcref_bases: gcref_managed.append(node) elif sup.dst in ref_bases: ref_managed.append(node) elif sup.dst in gc_bases: gc_managed.append(node) groups = [("GC manged ", gc_managed), ("ref counted ", ref_managed), ("in transition", gcref_managed)] total = sum([len(g) for (s,g) in groups]) for (s, g) in groups: percent = len(g) * 100 / total print "%2d%% is %s (%d hierarchies)" % (percent, s, len(g)) for base in gcref_managed: stats = dict({ 'classes': 0, 'ref-mixins': 0 }) for ptr in ptr_types: stats[ptr] = 0 hierarchy_stats(base, stats) hierarchies.append((base, stats)) print "\nHierarchies in transition (RefCountedGarbageCollected):" hierarchies.sort(key=lambda (n,s): -s['classes']) for (node, stats) in hierarchies: total = stats['mem'] + stats['ref'] + stats['raw'] print ( "%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins" % (stats['ref'] == 0 and stats['ref-mixins'] == 0 and "*" or " ", total == 0 and 100 or stats['mem'] * 100 / total, node.name.replace('blink::', ''), stats['classes'], stats['mem'], stats['ref'], stats['raw'], stats['ref-mixins'], )) def hierarchy_stats(node, stats): if not node: return stats['classes'] += 1 add_counts(stats, node.counts) for edge in node.super_edges(): if edge.dst in ref_mixins: stats['ref-mixins'] += 1 for edge in node.subclass_edges(): hierarchy_stats(graph.get(edge.dst), stats) def main(): global args args = parser.parse_args() if not (args.detect_cycles or args.print_stats): print "Please select an operation to perform (eg, -c to detect cycles)" parser.print_help() return 1 if args.pickle_graph and os.path.isfile(args.pickle_graph): load_graph() else: if args.use_stdin: log("Reading files from stdin") for f in sys.stdin: build_graph(f.strip()) else: log("Reading files and directories from command line") if len(args.files) == 0: print "Please provide files or directores for building the graph" parser.print_help() return 1 for f in args.files: if os.path.isdir(f): log("Building graph from files in directory: " + f) build_graphs_in_dir(f) else: log("Building graph from file: " + f) build_graph(f) log("Completing graph construction (%d graph nodes)" % len(graph)) complete_graph() if args.pickle_graph: save_graph() if args.detect_cycles: read_ignored_cycles() log("Detecting cycles containg GC roots") detect_cycles() if args.print_stats: log("Printing statistics") print_stats() if reported_error(): return 1 return 0 if __name__ == '__main__': sys.exit(main())