#!/usr/bin/env python # Copyright 2013 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. # A script to accumulate values from the 'dmprof cat' command into CSV or else. # # Usage: # ./accumulate.py -f <format> -t <template-name> < input.json > output # # <format> is one of "csv", "json", and "tree". If "csv" or "json" is given, # accumulate.py dumps a similar file to "dmprof csv|json". If "tree" is given, # accumulate.py dumps a human-readable breakdown tree. # # <template-name> is a label in templates.json. import datetime import json import logging import optparse import sys from lib.ordered_dict import OrderedDict LOGGER = logging.getLogger('dmprof-accumulate') def visit_in_template(template, snapshot, depth): """Visits all categories via a given template. This function is not used. It's a sample function to traverse a template. """ world = template[0] breakdown = template[1] rules = template[2] for rule, _ in snapshot[world]['breakdown'][breakdown].iteritems(): print (' ' * depth) + rule if rule in rules: visit_in_template(rules[rule], snapshot, depth + 1) def accumulate(template, snapshot, units_dict, target_units): """Accumulates units in a JSON |snapshot| with applying a given |template|. Args: template: A template tree included in a dmprof cat JSON file. snapshot: A snapshot in a dmprof cat JSON file. units_dict: A dict of units in worlds. target_units: A list of unit ids which are a target of this accumulation. """ world = template[0] breakdown = template[1] rules = template[2] remainder_units = target_units.copy() category_tree = OrderedDict() total = 0 for rule, match in snapshot[world]['breakdown'][breakdown].iteritems(): if 'hidden' in match and match['hidden']: continue matched_units = set(match['units']).intersection(target_units) subtotal = 0 for unit_id in matched_units: subtotal += units_dict[world][unit_id] total += subtotal remainder_units = remainder_units.difference(matched_units) if rule not in rules: # A category matched with |rule| is a leaf of the breakdown tree. # It is NOT broken down more. category_tree[rule] = subtotal continue # A category matched with |rule| is broken down more. subtemplate = rules[rule] subworld = subtemplate[0] subbreakdown = subtemplate[1] if subworld == world: # Break down in the same world: consider units. category_tree[rule], accounted_total, subremainder_units = accumulate( subtemplate, snapshot, units_dict, matched_units) subremainder_total = 0 if subremainder_units: for unit_id in subremainder_units: subremainder_total += units_dict[world][unit_id] category_tree[rule][None] = subremainder_total if subtotal != accounted_total + subremainder_total: print >> sys.stderr, ( 'WARNING: Sum of %s:%s is different from %s by %d bytes.' % ( subworld, subbreakdown, rule, subtotal - (accounted_total + subremainder_total))) else: # Break down in a different world: consider only the total size. category_tree[rule], accounted_total, _ = accumulate( subtemplate, snapshot, units_dict, set(units_dict[subworld].keys())) if subtotal >= accounted_total: category_tree[rule][None] = subtotal - accounted_total else: print >> sys.stderr, ( 'WARNING: Sum of %s:%s is larger than %s by %d bytes.' % ( subworld, subbreakdown, rule, accounted_total - subtotal)) print >> sys.stderr, ( 'WARNING: Assuming remainder of %s is 0.' % rule) category_tree[rule][None] = 0 return category_tree, total, remainder_units def flatten(category_tree, header=''): """Flattens a category tree into a flat list.""" result = [] for rule, sub in category_tree.iteritems(): if not rule: rule = 'remaining' if header: flattened_rule = header + '>' + rule else: flattened_rule = rule if isinstance(sub, dict) or isinstance(sub, OrderedDict): result.extend(flatten(sub, flattened_rule)) else: result.append((flattened_rule, sub)) return result def print_category_tree(category_tree, output, depth=0): """Prints a category tree in a human-readable format.""" for label in category_tree: print >> output, (' ' * depth), if (isinstance(category_tree[label], dict) or isinstance(category_tree[label], OrderedDict)): print >> output, '%s:' % label print_category_tree(category_tree[label], output, depth + 1) else: print >> output, '%s: %d' % (label, category_tree[label]) def flatten_all_category_trees(category_trees): flattened_labels = set() flattened_table = [] for category_tree in category_trees: flattened = OrderedDict() for label, subtotal in flatten(category_tree): flattened_labels.add(label) flattened[label] = subtotal flattened_table.append(flattened) return flattened_labels, flattened_table def output_csv(output, category_trees, data, first_time, output_exponent): flattened_labels, flattened_table = flatten_all_category_trees(category_trees) sorted_flattened_labels = sorted(flattened_labels) print >> output, ','.join(['second'] + sorted_flattened_labels) for index, row in enumerate(flattened_table): values = [str(data['snapshots'][index]['time'] - first_time)] for label in sorted_flattened_labels: if label in row: divisor = 1 if output_exponent.upper() == 'K': divisor = 1024.0 elif output_exponent.upper() == 'M': divisor = 1024.0 * 1024.0 values.append(str(row[label] / divisor)) else: values.append('0') print >> output, ','.join(values) def output_json(output, category_trees, data, first_time, template_label): flattened_labels, flattened_table = flatten_all_category_trees(category_trees) json_snapshots = [] for index, row in enumerate(flattened_table): row_with_meta = row.copy() row_with_meta['second'] = data['snapshots'][index]['time'] - first_time row_with_meta['dump_time'] = datetime.datetime.fromtimestamp( data['snapshots'][index]['time']).strftime('%Y-%m-%d %H:%M:%S') json_snapshots.append(row_with_meta) json_root = { 'version': 'JSON_DEEP_2', 'policies': { template_label: { 'legends': sorted(flattened_labels), 'snapshots': json_snapshots } } } json.dump(json_root, output, indent=2, sort_keys=True) def output_tree(output, category_trees): for index, category_tree in enumerate(category_trees): print >> output, '< Snapshot #%d >' % index print_category_tree(category_tree, output, 1) print >> output, '' def do_main(cat_input, output, template_label, output_format, output_exponent): """Does the main work: accumulate for every snapshot and print a result.""" if output_format not in ['csv', 'json', 'tree']: raise NotImplementedError('The output format \"%s\" is not implemented.' % output_format) if output_exponent.upper() not in ['B', 'K', 'M']: raise NotImplementedError('The exponent \"%s\" is not implemented.' % output_exponent) data = json.loads(cat_input.read(), object_pairs_hook=OrderedDict) templates = data['templates'] if not template_label: template_label = data['default_template'] if template_label not in templates: LOGGER.error('A template \'%s\' is not found.' % template_label) return template = templates[template_label] category_trees = [] first_time = None for snapshot in data['snapshots']: if not first_time: first_time = snapshot['time'] units = {} for world_name in snapshot['worlds']: world_units = {} for unit_id, sizes in snapshot['worlds'][world_name]['units'].iteritems(): world_units[int(unit_id)] = sizes[0] units[world_name] = world_units category_tree, _, _ = accumulate( template, snapshot['worlds'], units, set(units[template[0]].keys())) category_trees.append(category_tree) if output_format == 'csv': output_csv(output, category_trees, data, first_time, output_exponent) elif output_format == 'json': output_json(output, category_trees, data, first_time, template_label) elif output_format == 'tree': output_tree(output, category_trees) def main(): LOGGER.setLevel(logging.DEBUG) handler = logging.StreamHandler() handler.setLevel(logging.INFO) formatter = logging.Formatter('%(message)s') handler.setFormatter(formatter) LOGGER.addHandler(handler) parser = optparse.OptionParser() parser.add_option('-t', '--template', dest='template', metavar='TEMPLATE', help='Apply TEMPLATE to list up.') parser.add_option('-f', '--format', dest='format', default='csv', help='Specify the output format: csv, json or tree.') parser.add_option('-e', '--exponent', dest='exponent', default='M', help='Specify B (bytes), K (kilobytes) or M (megabytes).') options, _ = parser.parse_args(sys.argv) do_main(sys.stdin, sys.stdout, options.template, options.format, options.exponent) if __name__ == '__main__': sys.exit(main())