#!/usr/bin/env python
#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
#===------------------------------------------------------------------------===#
import bisect
import os
import re
import sys
import subprocess
symbolizers = {}
filetypes = {}
vmaddrs = {}
DEBUG = False
def fix_filename(file_name):
for path_to_cut in sys.argv[1:]:
file_name = re.sub(".*" + path_to_cut, "", file_name)
file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name)
file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
return file_name
class Symbolizer(object):
def __init__(self):
pass
class LinuxSymbolizer(Symbolizer):
def __init__(self, binary):
super(LinuxSymbolizer, self).__init__()
self.binary = binary
self.pipe = self.open_addr2line()
def open_addr2line(self):
cmd = ["addr2line", "-f", "-e", self.binary]
if DEBUG:
print ' '.join(cmd)
return subprocess.Popen(cmd,
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
def symbolize(self, prefix, addr, offset):
try:
print >> self.pipe.stdin, offset
function_name = self.pipe.stdout.readline().rstrip()
file_name = self.pipe.stdout.readline().rstrip()
except Exception:
function_name = ""
file_name = ""
file_name = fix_filename(file_name)
return "%s%s in %s %s" % (prefix, addr, function_name, file_name)
class DarwinSymbolizer(Symbolizer):
def __init__(self, addr, binary):
super(DarwinSymbolizer, self).__init__()
self.binary = binary
# Guess which arch we're running. 10 = len("0x") + 8 hex digits.
if len(addr) > 10:
self.arch = "x86_64"
else:
self.arch = "i386"
self.vmaddr = None
self.pipe = None
def get_binary_vmaddr(self):
"""
Get the slide value to be added to the address.
We're ooking for the following piece in otool -l output:
Load command 0
cmd LC_SEGMENT
cmdsize 736
segname __TEXT
vmaddr 0x00000000
"""
if self.vmaddr:
return self.vmaddr
cmdline = ["otool", "-l", self.binary]
pipe = subprocess.Popen(cmdline,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
is_text = False
vmaddr = 0
for line in pipe.stdout.readlines():
line = line.strip()
if line.startswith('segname'):
is_text = (line == 'segname __TEXT')
continue
if line.startswith('vmaddr') and is_text:
sv = line.split(' ')
vmaddr = int(sv[-1], 16)
break
self.vmaddr = vmaddr
return self.vmaddr
def write_addr_to_pipe(self, offset):
slide = self.get_binary_vmaddr()
print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide)
def open_atos(self):
if DEBUG:
print "atos -o %s -arch %s" % (self.binary, self.arch)
cmdline = ["atos", "-o", self.binary, "-arch", self.arch]
self.pipe = subprocess.Popen(cmdline,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
def symbolize(self, prefix, addr, offset):
self.open_atos()
self.write_addr_to_pipe(offset)
self.pipe.stdin.close()
atos_line = self.pipe.stdout.readline().rstrip()
# A well-formed atos response looks like this:
# foo(type1, type2) (in object.name) (filename.cc:80)
match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
if DEBUG:
print "atos_line: ", atos_line
if match:
function_name = match.group(1)
function_name = re.sub("\(.*?\)", "", function_name)
file_name = fix_filename(match.group(3))
return "%s%s in %s %s" % (prefix, addr, function_name, file_name)
else:
return "%s%s in %s" % (prefix, addr, atos_line)
# Chain two symbolizers so that the second one is called if the first fails.
class ChainSymbolizer(Symbolizer):
def __init__(self, symbolizer1, symbolizer2):
super(ChainSymbolizer, self).__init__()
self.symbolizer1 = symbolizer1
self.symbolizer2 = symbolizer2
def symbolize(self, prefix, addr, offset):
result = self.symbolizer1.symbolize(prefix, addr, offset)
if result is None:
result = self.symbolizer2.symbolize(prefix, addr, offset)
return result
def BreakpadSymbolizerFactory(addr, binary):
suffix = os.getenv("BREAKPAD_SUFFIX")
if suffix:
filename = binary + suffix
if os.access(filename, os.F_OK):
return BreakpadSymbolizer(filename)
return None
def SystemSymbolizerFactory(system, addr, binary):
if system == 'Darwin':
return DarwinSymbolizer(addr, binary)
elif system == 'Linux':
return LinuxSymbolizer(binary)
class BreakpadSymbolizer(Symbolizer):
def __init__(self, filename):
super(BreakpadSymbolizer, self).__init__()
self.filename = filename
lines = file(filename).readlines()
self.files = []
self.symbols = {}
self.address_list = []
self.addresses = {}
# MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
fragments = lines[0].rstrip().split()
self.arch = fragments[2]
self.debug_id = fragments[3]
self.binary = ' '.join(fragments[4:])
self.parse_lines(lines[1:])
def parse_lines(self, lines):
cur_function_addr = ''
for line in lines:
fragments = line.split()
if fragments[0] == 'FILE':
assert int(fragments[1]) == len(self.files)
self.files.append(' '.join(fragments[2:]))
elif fragments[0] == 'PUBLIC':
self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
elif fragments[0] in ['CFI', 'STACK']:
pass
elif fragments[0] == 'FUNC':
cur_function_addr = int(fragments[1], 16)
if not cur_function_addr in self.symbols.keys():
self.symbols[cur_function_addr] = ' '.join(fragments[4:])
else:
# Line starting with an address.
addr = int(fragments[0], 16)
self.address_list.append(addr)
# Tuple of symbol address, size, line, file number.
self.addresses[addr] = (cur_function_addr,
int(fragments[1], 16),
int(fragments[2]),
int(fragments[3]))
self.address_list.sort()
def get_sym_file_line(self, addr):
key = None
if addr in self.addresses.keys():
key = addr
else:
index = bisect.bisect_left(self.address_list, addr)
if index == 0:
return None
else:
key = self.address_list[index - 1]
sym_id, size, line_no, file_no = self.addresses[key]
symbol = self.symbols[sym_id]
filename = self.files[file_no]
if addr < key + size:
return symbol, filename, line_no
else:
return None
def symbolize(self, prefix, addr, offset):
res = self.get_sym_file_line(int(offset, 16))
if res:
function_name, file_name, line_no = res
result = "%s%s in %s %s:%d" % (
prefix, addr, function_name, file_name, line_no)
print result
return result
else:
return None
def symbolize_line(system, line):
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)',
line)
if match:
if DEBUG:
print line
prefix = match.group(1)
# frameno = match.group(2)
addr = match.group(3)
binary = match.group(4)
offset = match.group(5)
if not symbolizers.has_key(binary):
p = BreakpadSymbolizerFactory(addr, binary)
if p:
symbolizers[binary] = p
else:
symbolizers[binary] = SystemSymbolizerFactory(system, addr, binary)
result = symbolizers[binary].symbolize(prefix, addr, offset)
if result is None:
symbolizers[binary] = ChainSymbolizer(symbolizers[binary],
SystemSymbolizerFactory(system, addr, binary))
return symbolizers[binary].symbolize(prefix, addr, offset)
else:
return line
def main():
system = os.uname()[0]
if system in ['Linux', 'Darwin']:
for line in sys.stdin:
line = symbolize_line(system, line)
print line.rstrip()
else:
print 'Unknown system: ', system
if __name__ == '__main__':
main()