#!/usr/bin/python2.7 # Copyright (c) 2014 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import contextlib import itertools import os import os.path import re import subprocess import numpy import stats_utils class Error(Exception): """Module error class.""" class TestFail(Error): """Indicates a test condition failed (as opposed to tool failure).""" @contextlib.contextmanager def CleanupFile(path): """Context manager that deletes path on exit.""" try: yield finally: os.remove(path) DEVNULL = open('/dev/null', 'w') class Mmap(object): """Represents a memory map, and does the (un)mapping arithmetic.""" def __init__(self, start, length, pgoff): self.start = start self.length = length self.pgoff = pgoff def __repr__(self): return '[%x(%x) @ %x]' % (self.start, self.length, self.pgoff) def Map(self, ip): """Turns ip from a virtual mapped address back to a dso address. (Frankly I think these are named backwards. This follows the naming convention of perf's struct map.) """ # See perf's util/map.h: map__map_ip() return (ip + self.pgoff) - self.start def Unmap(self, rip): """Turns ip from a dso address into a virtual mapped address.""" # See perf's util/map.h: map__unmap_ip() return self.start + (rip - self.pgoff) MMAP_LINE_RE = re.compile( r'(?P<event_ts>\d+) ' r'(?P<event_offset>0x[0-9a-fA-F]+|0) ' r'[[](?P<event_size>0x[0-9a-fA-F]+|0)[]]: ' r'PERF_RECORD_MMAP ' r'(?P<pid>-?\d+)/(?P<tid>-?\d+): ' r'[[]' r'(?P<start>0x[0-9a-fA-F]+|0)' r'[(](?P<length>0x[0-9a-fA-F]+|0)[)] @ ' r'(?P<pgoff>0x[0-9a-fA-F]+|0)' r'[]]: ' r'((?P<executable>[rx]) )?' r'(?P<filename>.*)') @staticmethod def GetFromPerfData(perf_data_filename, mmap_filename): """Parse perf_data_filename and find how mmap_filename was mapped. @param perf_data_filename: perf.data filename. @param mmap_filename: Look for this mmap. @returns: Mmap object representing the map for mmap_filename. """ result = None raw_trace_proc = subprocess.Popen( ('perf', 'report', '-D', '-i', perf_data_filename), stdout=subprocess.PIPE, stderr=DEVNULL) for line in raw_trace_proc.stdout: if 'PERF_RECORD_MMAP' not in line: continue match = Mmap.MMAP_LINE_RE.match(line) if not match: raise Error('Unexpected format for MMAP record in raw dump:\n' + line) if match.group('filename') == mmap_filename: args = match.group('start', 'length', 'pgoff') result = Mmap(*tuple(int(x, 16) for x in args)) break for line in raw_trace_proc.stdout: # Skip rest of output pass raw_trace_proc.wait() return result RAW_EVENT_CODES = { 'br_inst_retired.all_branches': 'r4c4', } def TranslateEvents(events): return [RAW_EVENT_CODES.get(e, e) for e in events] # This is the right value for SandyBridge, IvyBridge and Haswell, at least. # See Intel manual vol. 3B, 17.4.8 LBR # TODO: Consider detecting if 16 is the correct branch buffer length base on the # uarch. However, all uarchs we run on have a 16-long buffer. BRANCH_BUFFER_LENGTH = 16 def EstimateExpectedSamples(loops, count): """Calculate the number of SAMPLE events expected. ie, expect estimate * BRANCH_BUFFER_LENGTH branches to be sampled. Incorporates the "observer effect": includes branches caused by returning from PMU interrupts. Includes one extra sample due to alignment of samples in the series of branches. This sample can be expected "most" of the time, but it is not incorrect for it to be missing. @param loops: the number of noploop branches executed. @param count: the event sampling period. ie, a sample should be collected every count branches. """ sample_count = 1 # assume program prolog takes one sample all_branches = loops loop_samples = loops/(count-1) while loop_samples >= 1: all_branches += loop_samples # compounding branches caused by samples caused by samples caused ... loop_samples = loop_samples/(count-1) sample_count += all_branches / count sample_count += 1 # due to alignment return sample_count def _CountRecordedBranches(perf_data_filename, dso_name, branch_addresses): """Count the branches recorded in perf_data_filename using perf report. Count the total number of branches recorded, and also the count recorded at a specific branch. @param perf_data_filename: perf data filename @param dso_name: dso that the branch specified by branch_addresses pertains to. @param branch_addresses: pair of (source, target) addresses specifying the branch within dso_name to count. @returns: pair with the the total branches recorded, and the count for the specified branch. """ mmap = Mmap.GetFromPerfData(perf_data_filename, dso_name) out = subprocess.check_output( ('perf', 'report', '-i', perf_data_filename, '-nv', '-s', 'dso_from,symbol_from,dso_to,symbol_to'), stderr=DEVNULL) total_sampled_branches = 0 branch_samples = 0 for line in out.splitlines(): if not line or line.startswith('#'): continue record = line.split() samples = int(record[1]) dso_from = record[2] raw_from_address = int(record[3], 16) dso_to = record[7] raw_to_address = int(record[8], 16) # including non-loop branches total_sampled_branches += samples if not (dso_from == dso_to == dso_name): continue from_address = mmap.Map(raw_from_address) to_address = mmap.Map(raw_to_address) if (from_address, to_address) == branch_addresses: branch_samples += samples # should only match once. return total_sampled_branches, branch_samples def GatherPerfBranchSamples(noploop, branch_addresses, events, count, progress_func=lambda i, j: None): """Run perf record -b with the given events, and noploop program. Expects to record the branch specified by branch_addresses. @param noploop: Path to noploop binary. It should take one argument (number of loop iterations) and produce no output. @param branch_addresses: pair of branch (source, target) addresses. @param events: Value to pass to '-e' arg of perf stat, which determines when the branch buffer is sampled. ':u' will be appended to each event in order to sample only userspace branches. Some events may be translated to raw event codes if necessary. @param count: Event period to sample. @returns: List of dicts containing facts about the executions of noploop. """ events = TranslateEvents(events.split(',')) events = ','.join(e + ':u' for e in events) facts = [] for i, j in itertools.product(xrange(10), xrange(5)): progress_func(i, j) loops = (i+1) * 10000000 # (i+1) * 10 million fact = {'loops': loops} perf_data = 'perf.lbr.noploop.%d.%d.data' % (loops, j) with CleanupFile(perf_data): subprocess.check_call( ('perf', 'record', '-o', perf_data, '-b', '-e', events, '-c', '%d' % count, noploop, '%d' % loops), stderr=DEVNULL) noploop_dso_name = os.path.abspath(noploop) total_sampled_branches, branch_samples = _CountRecordedBranches( perf_data, noploop_dso_name, branch_addresses) fact['branch_count'] = branch_samples total_samples = total_sampled_branches / BRANCH_BUFFER_LENGTH total_expected_samples = EstimateExpectedSamples(loops, count) if not (total_samples == total_expected_samples or total_samples == total_expected_samples - 1): # alignment raise TestFail('Saw the wrong number of samples: ' 'saw %d, expected %d or %d' % (total_samples, total_expected_samples, total_expected_samples - 1)) if fact['branch_count'] == 0: raise TestFail('No matching branch records found.') facts.append(fact) progress_func(-1, -1) # Finished return facts def ReadBranchAddressesFile(filename): with open(filename, 'r') as f: branch = tuple(int(x, 16) for x in f.read().split()) return branch def main(): """Verify the operation of LBR using a simple noploop program and perf.""" def _Progress(i, j): if i == -1 and j == -1: # Finished print return if j == 0: if i != 0: print print i, ':', print j, sys.stdout.flush() branch = ReadBranchAddressesFile('src/noploop_branch.txt') facts = GatherPerfBranchSamples('src/noploop', branch, 'br_inst_retired.all_branches', 10000, progress_func=_Progress) dt = numpy.dtype([('loops', numpy.int), ('branch_count', numpy.int)]) a = stats_utils.FactsToNumpyArray(facts, dt) (slope, intercept), r2 = stats_utils.LinearRegression( a['loops'], a['branch_count']) for f in facts: print f print "slope:", slope print "intercept:", intercept print "r-squared:", r2 if __name__ == '__main__': main()