builtin-sched.c - Android社区 - https://www.androidos.net.cn/

Kernel | 3.4

下载查看原文件

#include "builtin.h"
#include "perf.h"

#include "util/util.h"
#include "util/evlist.h"
#include "util/cache.h"
#include "util/evsel.h"
#include "util/symbol.h"
#include "util/thread.h"
#include "util/header.h"
#include "util/session.h"
#include "util/tool.h"

#include "util/parse-options.h"
#include "util/trace-event.h"

#include "util/debug.h"

#include <sys/prctl.h>
#include <sys/resource.h>

#include <semaphore.h>
#include <pthread.h>
#include <math.h>

static const char		*input_name;

static char			default_sort_order[] = "avg, max, switch, runtime";
static const char		*sort_order = default_sort_order;

static int			profile_cpu = -1;

#define PR_SET_NAME		15               /* Set process name */
#define MAX_CPUS		4096

static u64			run_measurement_overhead;
static u64			sleep_measurement_overhead;

#define COMM_LEN		20
#define SYM_LEN			129

#define MAX_PID			65536

static unsigned long		nr_tasks;

struct sched_atom;

struct task_desc {
	unsigned long		nr;
	unsigned long		pid;
	char			comm[COMM_LEN];

unsigned long		nr_events;
	unsigned long		curr_event;
	struct sched_atom	**atoms;

pthread_t		thread;
	sem_t			sleep_sem;

sem_t			ready_for_work;
	sem_t			work_done_sem;

u64			cpu_usage;
};

enum sched_event_type {
	SCHED_EVENT_RUN,
	SCHED_EVENT_SLEEP,
	SCHED_EVENT_WAKEUP,
	SCHED_EVENT_MIGRATION,
};

struct sched_atom {
	enum sched_event_type	type;
	int			specific_wait;
	u64			timestamp;
	u64			duration;
	unsigned long		nr;
	sem_t			*wait_sem;
	struct task_desc	*wakee;
};

static struct task_desc		*pid_to_task[MAX_PID];

static struct task_desc		**tasks;

static pthread_mutex_t		start_work_mutex = PTHREAD_MUTEX_INITIALIZER;
static u64			start_time;

static pthread_mutex_t		work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER;

static unsigned long		nr_run_events;
static unsigned long		nr_sleep_events;
static unsigned long		nr_wakeup_events;

static unsigned long		nr_sleep_corrections;
static unsigned long		nr_run_events_optimized;

static unsigned long		targetless_wakeups;
static unsigned long		multitarget_wakeups;

static u64			cpu_usage;
static u64			runavg_cpu_usage;
static u64			parent_cpu_usage;
static u64			runavg_parent_cpu_usage;

static unsigned long		nr_runs;
static u64			sum_runtime;
static u64			sum_fluct;
static u64			run_avg;

static unsigned int		replay_repeat = 10;
static unsigned long		nr_timestamps;
static unsigned long		nr_unordered_timestamps;
static unsigned long		nr_state_machine_bugs;
static unsigned long		nr_context_switch_bugs;
static unsigned long		nr_events;
static unsigned long		nr_lost_chunks;
static unsigned long		nr_lost_events;

#define TASK_STATE_TO_CHAR_STR "RSDTtZX"

enum thread_state {
	THREAD_SLEEPING = 0,
	THREAD_WAIT_CPU,
	THREAD_SCHED_IN,
	THREAD_IGNORE
};

struct work_atom {
	struct list_head	list;
	enum thread_state	state;
	u64			sched_out_time;
	u64			wake_up_time;
	u64			sched_in_time;
	u64			runtime;
};

struct work_atoms {
	struct list_head	work_list;
	struct thread		*thread;
	struct rb_node		node;
	u64			max_lat;
	u64			max_lat_at;
	u64			total_lat;
	u64			nb_atoms;
	u64			total_runtime;
};

typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);

static struct rb_root		atom_root, sorted_atom_root;

static u64			all_runtime;
static u64			all_count;

static u64 get_nsecs(void)
{
	struct timespec ts;

clock_gettime(CLOCK_MONOTONIC, &ts);

return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}

static void burn_nsecs(u64 nsecs)
{
	u64 T0 = get_nsecs(), T1;

do {
		T1 = get_nsecs();
	} while (T1 + run_measurement_overhead < T0 + nsecs);
}

static void sleep_nsecs(u64 nsecs)
{
	struct timespec ts;

ts.tv_nsec = nsecs % 999999999;
	ts.tv_sec = nsecs / 999999999;

nanosleep(&ts, NULL);
}

static void calibrate_run_measurement_overhead(void)
{
	u64 T0, T1, delta, min_delta = 1000000000ULL;
	int i;

for (i = 0; i < 10; i++) {
		T0 = get_nsecs();
		burn_nsecs(0);
		T1 = get_nsecs();
		delta = T1-T0;
		min_delta = min(min_delta, delta);
	}
	run_measurement_overhead = min_delta;

printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta);
}

static void calibrate_sleep_measurement_overhead(void)
{
	u64 T0, T1, delta, min_delta = 1000000000ULL;
	int i;

for (i = 0; i < 10; i++) {
		T0 = get_nsecs();
		sleep_nsecs(10000);
		T1 = get_nsecs();
		delta = T1-T0;
		min_delta = min(min_delta, delta);
	}
	min_delta -= 10000;
	sleep_measurement_overhead = min_delta;

printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta);
}

static struct sched_atom *
get_new_event(struct task_desc *task, u64 timestamp)
{
	struct sched_atom *event = zalloc(sizeof(*event));
	unsigned long idx = task->nr_events;
	size_t size;

event->timestamp = timestamp;
	event->nr = idx;

task->nr_events++;
	size = sizeof(struct sched_atom *) * task->nr_events;
	task->atoms = realloc(task->atoms, size);
	BUG_ON(!task->atoms);

task->atoms[idx] = event;

return event;
}

static struct sched_atom *last_event(struct task_desc *task)
{
	if (!task->nr_events)
		return NULL;

return task->atoms[task->nr_events - 1];
}

static void
add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration)
{
	struct sched_atom *event, *curr_event = last_event(task);

/*
	 * optimize an existing RUN event by merging this one
	 * to it:
	 */
	if (curr_event && curr_event->type == SCHED_EVENT_RUN) {
		nr_run_events_optimized++;
		curr_event->duration += duration;
		return;
	}

event = get_new_event(task, timestamp);

event->type = SCHED_EVENT_RUN;
	event->duration = duration;

nr_run_events++;
}

static void
add_sched_event_wakeup(struct task_desc *task, u64 timestamp,
		       struct task_desc *wakee)
{
	struct sched_atom *event, *wakee_event;

event = get_new_event(task, timestamp);
	event->type = SCHED_EVENT_WAKEUP;
	event->wakee = wakee;

wakee_event = last_event(wakee);
	if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) {
		targetless_wakeups++;
		return;
	}
	if (wakee_event->wait_sem) {
		multitarget_wakeups++;
		return;
	}

wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem));
	sem_init(wakee_event->wait_sem, 0, 0);
	wakee_event->specific_wait = 1;
	event->wait_sem = wakee_event->wait_sem;

nr_wakeup_events++;
}

static void
add_sched_event_sleep(struct task_desc *task, u64 timestamp,
		      u64 task_state __used)
{
	struct sched_atom *event = get_new_event(task, timestamp);

event->type = SCHED_EVENT_SLEEP;

nr_sleep_events++;
}

static struct task_desc *register_pid(unsigned long pid, const char *comm)
{
	struct task_desc *task;

BUG_ON(pid >= MAX_PID);

task = pid_to_task[pid];

if (task)
		return task;

task = zalloc(sizeof(*task));
	task->pid = pid;
	task->nr = nr_tasks;
	strcpy(task->comm, comm);
	/*
	 * every task starts in sleeping state - this gets ignored
	 * if there's no wakeup pointing to this sleep state:
	 */
	add_sched_event_sleep(task, 0, 0);

pid_to_task[pid] = task;
	nr_tasks++;
	tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *));
	BUG_ON(!tasks);
	tasks[task->nr] = task;

if (verbose)
		printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm);

return task;
}

static void print_task_traces(void)
{
	struct task_desc *task;
	unsigned long i;

for (i = 0; i < nr_tasks; i++) {
		task = tasks[i];
		printf("task %6ld (%20s:%10ld), nr_events: %ld\n",
			task->nr, task->comm, task->pid, task->nr_events);
	}
}

static void add_cross_task_wakeups(void)
{
	struct task_desc *task1, *task2;
	unsigned long i, j;

for (i = 0; i < nr_tasks; i++) {
		task1 = tasks[i];
		j = i + 1;
		if (j == nr_tasks)
			j = 0;
		task2 = tasks[j];
		add_sched_event_wakeup(task1, 0, task2);
	}
}

static void
process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
{
	int ret = 0;

switch (atom->type) {
		case SCHED_EVENT_RUN:
			burn_nsecs(atom->duration);
			break;
		case SCHED_EVENT_SLEEP:
			if (atom->wait_sem)
				ret = sem_wait(atom->wait_sem);
			BUG_ON(ret);
			break;
		case SCHED_EVENT_WAKEUP:
			if (atom->wait_sem)
				ret = sem_post(atom->wait_sem);
			BUG_ON(ret);
			break;
		case SCHED_EVENT_MIGRATION:
			break;
		default:
			BUG_ON(1);
	}
}

static u64 get_cpu_usage_nsec_parent(void)
{
	struct rusage ru;
	u64 sum;
	int err;

err = getrusage(RUSAGE_SELF, &ru);
	BUG_ON(err);

sum =  ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3;
	sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3;

return sum;
}

static int self_open_counters(void)
{
	struct perf_event_attr attr;
	int fd;

memset(&attr, 0, sizeof(attr));

attr.type = PERF_TYPE_SOFTWARE;
	attr.config = PERF_COUNT_SW_TASK_CLOCK;

fd = sys_perf_event_open(&attr, 0, -1, -1, 0);

if (fd < 0)
		die("Error: sys_perf_event_open() syscall returned"
		    "with %d (%s)\n", fd, strerror(errno));
	return fd;
}

static u64 get_cpu_usage_nsec_self(int fd)
{
	u64 runtime;
	int ret;

ret = read(fd, &runtime, sizeof(runtime));
	BUG_ON(ret != sizeof(runtime));

return runtime;
}

static void *thread_func(void *ctx)
{
	struct task_desc *this_task = ctx;
	u64 cpu_usage_0, cpu_usage_1;
	unsigned long i, ret;
	char comm2[22];
	int fd;

sprintf(comm2, ":%s", this_task->comm);
	prctl(PR_SET_NAME, comm2);
	fd = self_open_counters();

again:
	ret = sem_post(&this_task->ready_for_work);
	BUG_ON(ret);
	ret = pthread_mutex_lock(&start_work_mutex);
	BUG_ON(ret);
	ret = pthread_mutex_unlock(&start_work_mutex);
	BUG_ON(ret);

cpu_usage_0 = get_cpu_usage_nsec_self(fd);

for (i = 0; i < this_task->nr_events; i++) {
		this_task->curr_event = i;
		process_sched_event(this_task, this_task->atoms[i]);
	}

cpu_usage_1 = get_cpu_usage_nsec_self(fd);
	this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
	ret = sem_post(&this_task->work_done_sem);
	BUG_ON(ret);

ret = pthread_mutex_lock(&work_done_wait_mutex);
	BUG_ON(ret);
	ret = pthread_mutex_unlock(&work_done_wait_mutex);
	BUG_ON(ret);

goto again;
}

static void create_tasks(void)
{
	struct task_desc *task;
	pthread_attr_t attr;
	unsigned long i;
	int err;

err = pthread_attr_init(&attr);
	BUG_ON(err);
	err = pthread_attr_setstacksize(&attr,
			(size_t) max(16 * 1024, PTHREAD_STACK_MIN));
	BUG_ON(err);
	err = pthread_mutex_lock(&start_work_mutex);
	BUG_ON(err);
	err = pthread_mutex_lock(&work_done_wait_mutex);
	BUG_ON(err);
	for (i = 0; i < nr_tasks; i++) {
		task = tasks[i];
		sem_init(&task->sleep_sem, 0, 0);
		sem_init(&task->ready_for_work, 0, 0);
		sem_init(&task->work_done_sem, 0, 0);
		task->curr_event = 0;
		err = pthread_create(&task->thread, &attr, thread_func, task);
		BUG_ON(err);
	}
}

static void wait_for_tasks(void)
{
	u64 cpu_usage_0, cpu_usage_1;
	struct task_desc *task;
	unsigned long i, ret;

start_time = get_nsecs();
	cpu_usage = 0;
	pthread_mutex_unlock(&work_done_wait_mutex);

for (i = 0; i < nr_tasks; i++) {
		task = tasks[i];
		ret = sem_wait(&task->ready_for_work);
		BUG_ON(ret);
		sem_init(&task->ready_for_work, 0, 0);
	}
	ret = pthread_mutex_lock(&work_done_wait_mutex);
	BUG_ON(ret);

cpu_usage_0 = get_cpu_usage_nsec_parent();

pthread_mutex_unlock(&start_work_mutex);

for (i = 0; i < nr_tasks; i++) {
		task = tasks[i];
		ret = sem_wait(&task->work_done_sem);
		BUG_ON(ret);
		sem_init(&task->work_done_sem, 0, 0);
		cpu_usage += task->cpu_usage;
		task->cpu_usage = 0;
	}

cpu_usage_1 = get_cpu_usage_nsec_parent();
	if (!runavg_cpu_usage)
		runavg_cpu_usage = cpu_usage;
	runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10;

parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
	if (!runavg_parent_cpu_usage)
		runavg_parent_cpu_usage = parent_cpu_usage;
	runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 +
				   parent_cpu_usage)/10;

ret = pthread_mutex_lock(&start_work_mutex);
	BUG_ON(ret);

for (i = 0; i < nr_tasks; i++) {
		task = tasks[i];
		sem_init(&task->sleep_sem, 0, 0);
		task->curr_event = 0;
	}
}

static void run_one_test(void)
{
	u64 T0, T1, delta, avg_delta, fluct;

T0 = get_nsecs();
	wait_for_tasks();
	T1 = get_nsecs();

delta = T1 - T0;
	sum_runtime += delta;
	nr_runs++;

avg_delta = sum_runtime / nr_runs;
	if (delta < avg_delta)
		fluct = avg_delta - delta;
	else
		fluct = delta - avg_delta;
	sum_fluct += fluct;
	if (!run_avg)
		run_avg = delta;
	run_avg = (run_avg*9 + delta)/10;

printf("#%-3ld: %0.3f, ",
		nr_runs, (double)delta/1000000.0);

printf("ravg: %0.2f, ",
		(double)run_avg/1e6);

printf("cpu: %0.2f / %0.2f",
		(double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6);

#if 0
	/*
	 * rusage statistics done by the parent, these are less
	 * accurate than the sum_exec_runtime based statistics:
	 */
	printf(" [%0.2f / %0.2f]",
		(double)parent_cpu_usage/1e6,
		(double)runavg_parent_cpu_usage/1e6);
#endif

printf("\n");

if (nr_sleep_corrections)
		printf(" (%ld sleep corrections)\n", nr_sleep_corrections);
	nr_sleep_corrections = 0;
}

static void test_calibrations(void)
{
	u64 T0, T1;

T0 = get_nsecs();
	burn_nsecs(1e6);
	T1 = get_nsecs();

printf("the run test took %" PRIu64 " nsecs\n", T1 - T0);

T0 = get_nsecs();
	sleep_nsecs(1e6);
	T1 = get_nsecs();

printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0);
}

#define FILL_FIELD(ptr, field, event, data)	\
	ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data)

#define FILL_ARRAY(ptr, array, event, data)			\
do {								\
	void *__array = raw_field_ptr(event, #array, data);	\
	memcpy(ptr.array, __array, sizeof(ptr.array));	\
} while(0)

#define FILL_COMMON_FIELDS(ptr, event, data)			\
do {								\
	FILL_FIELD(ptr, common_type, event, data);		\
	FILL_FIELD(ptr, common_flags, event, data);		\
	FILL_FIELD(ptr, common_preempt_count, event, data);	\
	FILL_FIELD(ptr, common_pid, event, data);		\
	FILL_FIELD(ptr, common_tgid, event, data);		\
} while (0)

struct trace_switch_event {
	u32 size;