/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/sendfile.h>
#include <time.h>
#include <zlib.h>
#define NELEM(x) ((int) (sizeof(x) / sizeof((x)[0])))
/* Command line options */
static int g_traceDurationSeconds = 5;
static bool g_traceSchedSwitch = false;
static bool g_traceCpuFrequency = false;
static bool g_traceCpuIdle = false;
static bool g_traceDisk = false;
static bool g_traceGovernorLoad = false;
static bool g_traceWorkqueue = false;
static bool g_traceOverwrite = false;
static int g_traceBufferSizeKB = 2048;
static bool g_compress = false;
/* Global state */
static bool g_traceAborted = false;
/* Sys file paths */
static const char* k_traceClockPath =
"/sys/kernel/debug/tracing/trace_clock";
static const char* k_traceBufferSizePath =
"/sys/kernel/debug/tracing/buffer_size_kb";
static const char* k_tracingOverwriteEnablePath =
"/sys/kernel/debug/tracing/options/overwrite";
static const char* k_schedSwitchEnablePath =
"/sys/kernel/debug/tracing/events/sched/sched_switch/enable";
static const char* k_schedWakeupEnablePath =
"/sys/kernel/debug/tracing/events/sched/sched_wakeup/enable";
static const char* k_cpuFreqEnablePath =
"/sys/kernel/debug/tracing/events/power/cpu_frequency/enable";
static const char* k_cpuIdleEnablePath =
"/sys/kernel/debug/tracing/events/power/cpu_idle/enable";
static const char* k_governorLoadEnablePath =
"/sys/kernel/debug/tracing/events/cpufreq_interactive/enable";
static const char* k_workqueueEnablePath =
"/sys/kernel/debug/tracing/events/workqueue/enable";
static const char* k_diskEnablePaths[] = {
"/sys/kernel/debug/tracing/events/ext4/ext4_sync_file_enter/enable",
"/sys/kernel/debug/tracing/events/ext4/ext4_sync_file_exit/enable",
"/sys/kernel/debug/tracing/events/block/block_rq_issue/enable",
"/sys/kernel/debug/tracing/events/block/block_rq_complete/enable",
};
static const char* k_tracingOnPath =
"/sys/kernel/debug/tracing/tracing_on";
static const char* k_tracePath =
"/sys/kernel/debug/tracing/trace";
static const char* k_traceMarkerPath =
"/sys/kernel/debug/tracing/trace_marker";
// Write a string to a file, returning true if the write was successful.
bool writeStr(const char* filename, const char* str)
{
int fd = open(filename, O_WRONLY);
if (fd == -1) {
fprintf(stderr, "error opening %s: %s (%d)\n", filename,
strerror(errno), errno);
return false;
}
bool ok = true;
ssize_t len = strlen(str);
if (write(fd, str, len) != len) {
fprintf(stderr, "error writing to %s: %s (%d)\n", filename,
strerror(errno), errno);
ok = false;
}
close(fd);
return ok;
}
// Enable or disable a kernel option by writing a "1" or a "0" into a /sys file.
static bool setKernelOptionEnable(const char* filename, bool enable)
{
return writeStr(filename, enable ? "1" : "0");
}
// Enable or disable a collection of kernel options by writing a "1" or a "0" into each /sys file.
static bool setMultipleKernelOptionsEnable(const char** filenames, size_t count, bool enable)
{
bool result = true;
for (size_t i = 0; i < count; i++) {
result &= setKernelOptionEnable(filenames[i], enable);
}
return result;
}
// Enable or disable overwriting of the kernel trace buffers. Disabling this
// will cause tracing to stop once the trace buffers have filled up.
static bool setTraceOverwriteEnable(bool enable)
{
return setKernelOptionEnable(k_tracingOverwriteEnablePath, enable);
}
// Enable or disable tracing of the kernel scheduler switching.
static bool setSchedSwitchTracingEnable(bool enable)
{
bool ok = true;
ok &= setKernelOptionEnable(k_schedSwitchEnablePath, enable);
ok &= setKernelOptionEnable(k_schedWakeupEnablePath, enable);
return ok;
}
// Enable or disable tracing of the CPU clock frequency.
static bool setCpuFrequencyTracingEnable(bool enable)
{
return setKernelOptionEnable(k_cpuFreqEnablePath, enable);
}
// Enable or disable tracing of CPU idle events.
static bool setCpuIdleTracingEnable(bool enable)
{
return setKernelOptionEnable(k_cpuIdleEnablePath, enable);
}
// Enable or disable tracing of the interactive CPU frequency governor's idea of
// the CPU load.
static bool setGovernorLoadTracingEnable(bool enable)
{
return setKernelOptionEnable(k_governorLoadEnablePath, enable);
}
// Enable or disable tracing of the kernel workqueues.
static bool setWorkqueueTracingEnabled(bool enable)
{
return setKernelOptionEnable(k_workqueueEnablePath, enable);
}
// Enable or disable tracing of disk I/O.
static bool setDiskTracingEnabled(bool enable)
{
return setMultipleKernelOptionsEnable(k_diskEnablePaths, NELEM(k_diskEnablePaths), enable);
}
// Enable or disable kernel tracing.
static bool setTracingEnabled(bool enable)
{
return setKernelOptionEnable(k_tracingOnPath, enable);
}
// Clear the contents of the kernel trace.
static bool clearTrace()
{
int traceFD = creat(k_tracePath, 0);
if (traceFD == -1) {
fprintf(stderr, "error truncating %s: %s (%d)\n", k_tracePath,
strerror(errno), errno);
return false;
}
close(traceFD);
return true;
}
// Set the size of the kernel's trace buffer in kilobytes.
static bool setTraceBufferSizeKB(int size)
{
char str[32] = "1";
int len;
if (size < 1) {
size = 1;
}
snprintf(str, 32, "%d", size);
return writeStr(k_traceBufferSizePath, str);
}
// Enable or disable the kernel's use of the global clock. Disabling the global
// clock will result in the kernel using a per-CPU local clock.
static bool setGlobalClockEnable(bool enable)
{
return writeStr(k_traceClockPath, enable ? "global" : "local");
}
// Check whether a file exists.
static bool fileExists(const char* filename) {
return access(filename, F_OK) != -1;
}
// Enable tracing in the kernel.
static bool startTrace(bool isRoot)
{
bool ok = true;
// Set up the tracing options that don't require root.
ok &= setTraceOverwriteEnable(g_traceOverwrite);
ok &= setSchedSwitchTracingEnable(g_traceSchedSwitch);
ok &= setCpuFrequencyTracingEnable(g_traceCpuFrequency);
ok &= setCpuIdleTracingEnable(g_traceCpuIdle);
if (fileExists(k_governorLoadEnablePath) || g_traceGovernorLoad) {
ok &= setGovernorLoadTracingEnable(g_traceGovernorLoad);
}
ok &= setTraceBufferSizeKB(g_traceBufferSizeKB);
ok &= setGlobalClockEnable(true);
// Set up the tracing options that do require root. The options that
// require root should have errored out earlier if we're not running as
// root.
if (isRoot) {
ok &= setWorkqueueTracingEnabled(g_traceWorkqueue);
ok &= setDiskTracingEnabled(g_traceDisk);
}
// Enable tracing.
ok &= setTracingEnabled(true);
if (!ok) {
fprintf(stderr, "error: unable to start trace\n");
}
return ok;
}
// Disable tracing in the kernel.
static void stopTrace(bool isRoot)
{
// Disable tracing.
setTracingEnabled(false);
// Set the options back to their defaults.
setTraceOverwriteEnable(true);
setSchedSwitchTracingEnable(false);
setCpuFrequencyTracingEnable(false);
if (fileExists(k_governorLoadEnablePath)) {
setGovernorLoadTracingEnable(false);
}
setGlobalClockEnable(false);
if (isRoot) {
setWorkqueueTracingEnabled(false);
setDiskTracingEnabled(false);
}
// Note that we can't reset the trace buffer size here because that would
// clear the trace before we've read it.
}
// Read the current kernel trace and write it to stdout.
static void dumpTrace()
{
int traceFD = open(k_tracePath, O_RDWR);
if (traceFD == -1) {
fprintf(stderr, "error opening %s: %s (%d)\n", k_tracePath,
strerror(errno), errno);
return;
}
if (g_compress) {
z_stream zs;
uint8_t *in, *out;
int result, flush;
bzero(&zs, sizeof(zs));
result = deflateInit(&zs, Z_DEFAULT_COMPRESSION);
if (result != Z_OK) {
fprintf(stderr, "error initializing zlib: %d\n", result);
close(traceFD);
return;
}
const size_t bufSize = 64*1024;
in = (uint8_t*)malloc(bufSize);
out = (uint8_t*)malloc(bufSize);
flush = Z_NO_FLUSH;
zs.next_out = out;
zs.avail_out = bufSize;
do {
if (zs.avail_in == 0) {
// More input is needed.
result = read(traceFD, in, bufSize);
if (result < 0) {
fprintf(stderr, "error reading trace: %s (%d)\n",
strerror(errno), errno);
result = Z_STREAM_END;
break;
} else if (result == 0) {
flush = Z_FINISH;
} else {
zs.next_in = in;
zs.avail_in = result;
}
}
if (zs.avail_out == 0) {
// Need to write the output.
result = write(STDOUT_FILENO, out, bufSize);
if ((size_t)result < bufSize) {
fprintf(stderr, "error writing deflated trace: %s (%d)\n",
strerror(errno), errno);
result = Z_STREAM_END; // skip deflate error message
zs.avail_out = bufSize; // skip the final write
break;
}
zs.next_out = out;
zs.avail_out = bufSize;
}
} while ((result = deflate(&zs, flush)) == Z_OK);
if (result != Z_STREAM_END) {
fprintf(stderr, "error deflating trace: %s\n", zs.msg);
}
if (zs.avail_out < bufSize) {
size_t bytes = bufSize - zs.avail_out;
result = write(STDOUT_FILENO, out, bytes);
if ((size_t)result < bytes) {
fprintf(stderr, "error writing deflated trace: %s (%d)\n",
strerror(errno), errno);
}
}
result = deflateEnd(&zs);
if (result != Z_OK) {
fprintf(stderr, "error cleaning up zlib: %d\n", result);
}
free(in);
free(out);
} else {
ssize_t sent = 0;
while ((sent = sendfile(STDOUT_FILENO, traceFD, NULL, 64*1024*1024)) > 0);
if (sent == -1) {
fprintf(stderr, "error dumping trace: %s (%d)\n", strerror(errno),
errno);
}
}
close(traceFD);
}
// Print the command usage help to stderr.
static void showHelp(const char *cmd)
{
fprintf(stderr, "usage: %s [options]\n", cmd);
fprintf(stderr, "options include:\n"
" -b N use a trace buffer size of N KB\n"
" -c trace into a circular buffer\n"
" -d trace disk I/O\n"
" -f trace CPU frequency changes\n"
" -l trace CPU frequency governor load\n"
" -s trace the kernel scheduler switches\n"
" -t N trace for N seconds [defualt 5]\n"
" -w trace the kernel workqueue\n"
" -z compress the trace dump\n");
}
static void handleSignal(int signo) {
g_traceAborted = true;
}
static void registerSigHandler() {
struct sigaction sa;
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;
sa.sa_handler = handleSignal;
sigaction(SIGHUP, &sa, NULL);
sigaction(SIGINT, &sa, NULL);
sigaction(SIGQUIT, &sa, NULL);
sigaction(SIGTERM, &sa, NULL);
}
int main(int argc, char **argv)
{
bool isRoot = (getuid() == 0);
if (argc == 2 && 0 == strcmp(argv[1], "--help")) {
showHelp(argv[0]);
exit(0);
}
for (;;) {
int ret;
ret = getopt(argc, argv, "b:cidflst:wz");
if (ret < 0) {
break;
}
switch(ret) {
case 'b':
g_traceBufferSizeKB = atoi(optarg);
break;
case 'c':
g_traceOverwrite = true;
break;
case 'i':
g_traceCpuIdle = true;
break;
case 'l':
g_traceGovernorLoad = true;
break;
case 'd':
if (!isRoot) {
fprintf(stderr, "error: tracing disk activity requires root privileges\n");
exit(1);
}
g_traceDisk = true;
break;
case 'f':
g_traceCpuFrequency = true;
break;
case 's':
g_traceSchedSwitch = true;
break;
case 't':
g_traceDurationSeconds = atoi(optarg);
break;
case 'w':
if (!isRoot) {
fprintf(stderr, "error: tracing kernel work queues requires root privileges\n");
exit(1);
}
g_traceWorkqueue = true;
break;
case 'z':
g_compress = true;
break;
default:
fprintf(stderr, "\n");
showHelp(argv[0]);
exit(-1);
break;
}
}
registerSigHandler();
bool ok = startTrace(isRoot);
if (ok) {
printf("capturing trace...");
fflush(stdout);
// We clear the trace after starting it because tracing gets enabled for
// each CPU individually in the kernel. Having the beginning of the trace
// contain entries from only one CPU can cause "begin" entries without a
// matching "end" entry to show up if a task gets migrated from one CPU to
// another.
ok = clearTrace();
if (ok) {
// Sleep to allow the trace to be captured.
struct timespec timeLeft;
timeLeft.tv_sec = g_traceDurationSeconds;
timeLeft.tv_nsec = 0;
do {
if (g_traceAborted) {
break;
}
} while (nanosleep(&timeLeft, &timeLeft) == -1 && errno == EINTR);
}
}
// Stop the trace and restore the default settings.
stopTrace(isRoot);
if (ok) {
if (!g_traceAborted) {
printf(" done\nTRACE:\n");
fflush(stdout);
dumpTrace();
} else {
printf("\ntrace aborted.\n");
fflush(stdout);
}
clearTrace();
} else {
fprintf(stderr, "unable to start tracing\n");
}
// Reset the trace buffer size to 1.
setTraceBufferSizeKB(1);
return g_traceAborted ? 1 : 0;
}