#!/usr/bin/env python # Copyright 2016 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import argparse import bisect import collections import gzip import json import os import re import subprocess import sys _SYMBOLS_PATH = os.path.abspath(os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', 'third_party', 'symbols')) sys.path.append(_SYMBOLS_PATH) # pylint: disable=import-error import symbols.elf_symbolizer as elf_symbolizer # Relevant trace event phases from Chromium's # src/base/trace_event/common/trace_event_common.h. TRACE_EVENT_PHASE_METADATA = 'M' TRACE_EVENT_PHASE_MEMORY_DUMP = 'v' # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so) # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available # via 'name' group. ANDROID_PATH_MATCHER = re.compile( r'^/data/(?:app/[^/]+/lib/[^/]+/|app-lib/[^/]+/)(?P<name>.*\.so)') # Subpath of output path where unstripped libraries are stored. ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped' def FindInSystemPath(binary_name): paths = os.environ['PATH'].split(os.pathsep) for path in paths: binary_path = os.path.join(path, binary_name) if os.path.isfile(binary_path): return binary_path return None def IsSymbolizableFile(file_path): result = subprocess.check_output(['file', '-0', file_path]) type_string = result[result.find('\0') + 1:] return bool(re.match(r'\: (ELF|Mach-O) (32|64)-bit\b', type_string)) class ProcessMemoryMaps(object): """Represents 'process_mmaps' trace file entry.""" class Region(object): def __init__(self, start_address, size, file_path): self._start_address = start_address self._size = size self._file_path = file_path @property def start_address(self): return self._start_address @property def end_address(self): return self._start_address + self._size @property def size(self): return self._size @property def file_path(self): return self._file_path def __cmp__(self, other): if isinstance(other, type(self)): return long(self._start_address).__cmp__(long(other._start_address)) elif isinstance(other, (long, int)): return long(self._start_address).__cmp__(long(other)) else: raise Exception('Cannot compare with %s' % type(other)) def __repr__(self): return 'Region(0x{:X} - 0x{:X}, {})'.format( self.start_address, self.end_address, self.file_path) def __init__(self, process_mmaps): """Parses 'process_mmaps' dictionary.""" regions = [] for region_value in process_mmaps['vm_regions']: regions.append(self.Region( long(region_value['sa'], 16), long(region_value['sz'], 16), region_value['mf'])) regions.sort() # Copy regions without duplicates and check for overlaps. self._regions = [] previous_region = None for region in regions: if previous_region is not None: if region == previous_region: continue assert region.start_address >= previous_region.end_address, \ 'Regions {} and {} overlap.'.format(previous_region, region) previous_region = region self._regions.append(region) @property def regions(self): return self._regions def FindRegion(self, address): """Finds region containing |address|. Returns None if none found.""" region_index = bisect.bisect_right(self._regions, address) - 1 if region_index >= 0: region = self._regions[region_index] if address >= region.start_address and address < region.end_address: return region return None class StackFrames(object): """Represents 'stackFrames' trace file entry.""" class PCFrame(object): def __init__(self, pc, frame): self._modified = False self._pc = pc self._frame = frame @property def modified(self): return self._modified @property def pc(self): return self._pc @property def name(self): return self._frame['name'] @name.setter def name(self, value): self._modified = True self._frame['name'] = value def __init__(self, stack_frames): """Constructs object using 'stackFrames' dictionary.""" self._pc_frames = [] for frame in stack_frames.itervalues(): pc_frame = self._ParsePCFrame(frame) if pc_frame: self._pc_frames.append(pc_frame) @property def pc_frames(self): return self._pc_frames @property def modified(self): return any(f.modified for f in self._pc_frames) _PC_TAG = 'pc:' @classmethod def _ParsePCFrame(self, frame): name = frame['name'] if not name.startswith(self._PC_TAG): return None pc = long(name[len(self._PC_TAG):], 16) return self.PCFrame(pc, frame) class Process(object): """Holds various bits of information about a process in a trace file.""" def __init__(self, pid): self.pid = pid self.name = None self.mmaps = None self.stack_frames = None def CollectProcesses(trace): """Parses trace dictionary and returns pid->Process map of all processes suitable for symbolization (which have both mmaps and stack_frames). """ process_map = {} # Android traces produced via 'chrome://inspect/?tracing#devices' are # just list of events. events = trace if isinstance(trace, list) else trace['traceEvents'] for event in events: name = event.get('name') if not name: continue pid = event['pid'] process = process_map.get(pid) if process is None: process = Process(pid) process_map[pid] = process phase = event['ph'] if phase == TRACE_EVENT_PHASE_METADATA: if name == 'process_name': process.name = event['args']['name'] elif name == 'stackFrames': process.stack_frames = StackFrames(event['args']['stackFrames']) elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP: process_mmaps = event['args']['dumps'].get('process_mmaps') if process_mmaps: # TODO(dskiba): this parses all process_mmaps, but retains only the # last one. We need to parse only once (lazy parsing?). process.mmaps = ProcessMemoryMaps(process_mmaps) return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames] class SymbolizableFile(object): """Holds file path, addresses to symbolize and stack frames to update. This class is a link between ELFSymbolizer and a trace file: it specifies what to symbolize (addresses) and what to update with the symbolization result (frames). """ def __init__(self, file_path): self.path = file_path self.frames_by_address = collections.defaultdict(list) def ResolveSymbolizableFiles(processes): """Resolves and groups PCs into list of SymbolizableFiles. As part of the grouping process, this function resolves PC from each stack frame to the corresponding mmap region. Stack frames that failed to resolve are symbolized with '<unresolved>'. """ symfile_by_path = {} for process in processes: for frame in process.stack_frames.pc_frames: region = process.mmaps.FindRegion(frame.pc) if region is None: frame.name = '<unresolved>' continue symfile = symfile_by_path.get(region.file_path) if symfile is None: symfile = SymbolizableFile(region.file_path) symfile_by_path[symfile.path] = symfile relative_pc = frame.pc - region.start_address symfile.frames_by_address[relative_pc].append(frame) return symfile_by_path.values() def SymbolizeFiles(symfiles, addr2line_path): """Symbolizes each file in the given list of SymbolizableFiles and updates stack frames with symbolization results.""" print 'Symbolizing...' def _SubPrintf(message, *args): print (' ' + message).format(*args) symbolized = False for symfile in symfiles: unsymbolized_name = '<{}>'.format( symfile.path if symfile.path else 'unnamed') problem = None if not os.path.isabs(symfile.path): problem = 'not a file' elif not os.path.isfile(symfile.path): problem = "file doesn't exist" elif not IsSymbolizableFile(symfile.path): problem = 'file is not symbolizable' if problem: _SubPrintf("Won't symbolize {} PCs for '{}': {}.", len(symfile.frames_by_address), symfile.path, problem) for frames in symfile.frames_by_address.itervalues(): for frame in frames: frame.name = unsymbolized_name continue def _SymbolizerCallback(sym_info, frames): # Unwind inline chain to the top. while sym_info.inlined_by: sym_info = sym_info.inlined_by symbolized_name = sym_info.name if sym_info.name else unsymbolized_name for frame in frames: frame.name = symbolized_name symbolizer = elf_symbolizer.ELFSymbolizer(symfile.path, addr2line_path, _SymbolizerCallback, inlines=True) _SubPrintf('Symbolizing {} PCs from {}...', len(symfile.frames_by_address), symfile.path) for address, frames in symfile.frames_by_address.iteritems(): # SymbolizeAsync() asserts that the type of address is int. We operate # on longs (since they are raw pointers possibly from 64-bit processes). # It's OK to cast here because we're passing relative PC, which should # always fit into int. symbolizer.SymbolizeAsync(int(address), frames) symbolizer.Join() symbolized = True return symbolized def HaveFilesFromAndroid(symfiles): return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles) def RemapAndroidFiles(symfiles, output_path): for symfile in symfiles: match = ANDROID_PATH_MATCHER.match(symfile.path) if match: name = match.group('name') symfile.path = os.path.join(output_path, ANDROID_UNSTRIPPED_SUBPATH, name) # Suffix used for backup files. BACKUP_FILE_TAG = '.BACKUP' def main(): parser = argparse.ArgumentParser() parser.add_argument('file', help='Trace file to symbolize (.json or .json.gz)') parser.add_argument('--no-backup', dest='backup', default='true', action='store_false', help="Don't create {} files".format(BACKUP_FILE_TAG)) parser.add_argument('--output-directory', help='The path to the build output directory, such ' + 'as out/Debug. Only needed for Android.') options = parser.parse_args() trace_file_path = options.file def _OpenTraceFile(mode): if trace_file_path.endswith('.gz'): return gzip.open(trace_file_path, mode + 'b') else: return open(trace_file_path, mode + 't') addr2line_path = FindInSystemPath('addr2line') if addr2line_path is None: sys.exit("Can't symbolize - no addr2line in PATH.") print 'Reading trace file...' with _OpenTraceFile('r') as trace_file: trace = json.load(trace_file) processes = CollectProcesses(trace) symfiles = ResolveSymbolizableFiles(processes) # Android trace files don't have any indication they are from Android. # So we're checking for Android-specific paths. if HaveFilesFromAndroid(symfiles): if not options.output_directory: parser.error('The trace file appears to be from Android. Please ' "specify output directory (e.g. 'out/Debug') to properly " 'symbolize it.') RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory)) if SymbolizeFiles(symfiles, addr2line_path): if options.backup: backup_file_path = trace_file_path + BACKUP_FILE_TAG print 'Backing up trace file to {}...'.format(backup_file_path) os.rename(trace_file_path, backup_file_path) print 'Updating trace file...' with _OpenTraceFile('w') as trace_file: json.dump(trace, trace_file) else: print 'No PCs symbolized - not updating trace file.' if __name__ == '__main__': main()