/**
* @file opd_perfmon.c
* perfmonctl() handling
*
* @remark Copyright 2003 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon
*/
#ifdef __ia64__
/* need this for sched_setaffinity() in <sched.h> */
#define _GNU_SOURCE
#include "oprofiled.h"
#include "opd_perfmon.h"
#include "opd_events.h"
#include "op_cpu_type.h"
#include "op_libiberty.h"
#include "op_hw_config.h"
#include <sys/syscall.h>
#include <sys/wait.h>
#include <unistd.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifdef HAVE_SCHED_SETAFFINITY
#include <sched.h>
#endif
extern op_cpu cpu_type;
#ifndef HAVE_SCHED_SETAFFINITY
/* many glibc's are not yet up to date */
#ifndef __NR_sched_setaffinity
#define __NR_sched_setaffinity 1231
#endif
/* Copied from glibc's <sched.h> and <bits/sched.h> and munged */
#define CPU_SETSIZE 1024
#define __NCPUBITS (8 * sizeof (unsigned long))
typedef struct
{
unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
} cpu_set_t;
#define CPU_SET(cpu, cpusetp) \
((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
#define CPU_ZERO(cpusetp) \
memset((cpusetp), 0, sizeof(cpu_set_t))
static int
sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp)
{
return syscall(__NR_sched_setaffinity, pid, len, cpusetp);
}
#endif
#ifndef HAVE_PERFMONCTL
#ifndef __NR_perfmonctl
#define __NR_perfmonctl 1175
#endif
static int perfmonctl(int fd, int cmd, void * arg, int narg)
{
return syscall(__NR_perfmonctl, fd, cmd, arg, narg);
}
#endif
static unsigned char uuid[16] = {
0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69,
0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c
};
static size_t nr_cpus;
struct child {
pid_t pid;
int up_pipe[2];
int ctx_fd;
sig_atomic_t sigusr1;
sig_atomic_t sigusr2;
sig_atomic_t sigterm;
};
static struct child * children;
static void perfmon_start_child(int ctx_fd)
{
if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) {
exit(EXIT_FAILURE);
}
}
static void perfmon_stop_child(int ctx_fd)
{
if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) {
exit(EXIT_FAILURE);
}
}
static void child_sigusr1(int val __attribute__((unused)))
{
size_t i;
for (i = 0; i < nr_cpus; ++i) {
if (children[i].pid == getpid()) {
children[i].sigusr1 = 1;
return;
}
}
}
static void child_sigusr2(int val __attribute__((unused)))
{
size_t i;
for (i = 0; i < nr_cpus; ++i) {
if (children[i].pid == getpid()) {
children[i].sigusr2 = 1;
return;
}
}
}
static void child_sigterm(int val __attribute__((unused)))
{
kill(getppid(), SIGTERM);
}
static void set_affinity(size_t cpu)
{
cpu_set_t set;
int err;
CPU_ZERO(&set);
CPU_SET(cpu, &set);
err = sched_setaffinity(getpid(), sizeof(set), &set);
if (err == -1) {
perror("Failed to set affinity");
exit(EXIT_FAILURE);
}
}
static void setup_signals(void)
{
struct sigaction act;
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGUSR1);
sigaddset(&mask, SIGUSR2);
sigprocmask(SIG_BLOCK, &mask, NULL);
act.sa_handler = child_sigusr1;
act.sa_flags = 0;
sigemptyset(&act.sa_mask);
if (sigaction(SIGUSR1, &act, NULL)) {
perror("oprofiled: install of SIGUSR1 handler failed");
exit(EXIT_FAILURE);
}
act.sa_handler = child_sigusr2;
act.sa_flags = 0;
sigemptyset(&act.sa_mask);
if (sigaction(SIGUSR2, &act, NULL)) {
perror("oprofiled: install of SIGUSR2 handler failed");
exit(EXIT_FAILURE);
}
act.sa_handler = child_sigterm;
act.sa_flags = 0;
sigemptyset(&act.sa_mask);
if (sigaction(SIGTERM, &act, NULL)) {
perror("oprofiled: install of SIGTERM handler failed");
exit(EXIT_FAILURE);
}
}
/** create the per-cpu context */
static void create_context(struct child * self)
{
pfarg_context_t ctx;
int err;
memset(&ctx, 0, sizeof(pfarg_context_t));
memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16);
ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;
err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1);
if (err == -1) {
perror("CREATE_CONTEXT failed");
exit(EXIT_FAILURE);
}
self->ctx_fd = ctx.ctx_fd;
}
/** program the perfmon counters */
static void write_pmu(struct child * self)
{
pfarg_reg_t pc[OP_MAX_COUNTERS];
pfarg_reg_t pd[OP_MAX_COUNTERS];
int err;
size_t i;
memset(pc, 0, sizeof(pc));
memset(pd, 0, sizeof(pd));
#define PMC_GEN_INTERRUPT (1UL << 5)
#define PMC_PRIV_MONITOR (1UL << 6)
/* McKinley requires pmc4 to have bit 23 set (enable PMU).
* It is supposedly ignored in other pmc registers.
*/
#define PMC_MANDATORY (1UL << 23)
#define PMC_USER (1UL << 3)
#define PMC_KERNEL (1UL << 0)
for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
struct opd_event * event = &opd_events[i];
pc[i].reg_num = event->counter + 4;
pc[i].reg_value = PMC_GEN_INTERRUPT;
pc[i].reg_value |= PMC_PRIV_MONITOR;
pc[i].reg_value |= PMC_MANDATORY;
(event->user) ? (pc[i].reg_value |= PMC_USER)
: (pc[i].reg_value &= ~PMC_USER);
(event->kernel) ? (pc[i].reg_value |= PMC_KERNEL)
: (pc[i].reg_value &= ~PMC_KERNEL);
pc[i].reg_value &= ~(0xff << 8);
pc[i].reg_value |= ((event->value & 0xff) << 8);
pc[i].reg_value &= ~(0xf << 16);
pc[i].reg_value |= ((event->um & 0xf) << 16);
pc[i].reg_smpl_eventid = event->counter;
}
for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
struct opd_event * event = &opd_events[i];
pd[i].reg_value = ~0UL - event->count + 1;
pd[i].reg_short_reset = ~0UL - event->count + 1;
pd[i].reg_num = event->counter + 4;
}
err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i);
if (err == -1) {
perror("Couldn't write PMCs");
exit(EXIT_FAILURE);
}
err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i);
if (err == -1) {
perror("Couldn't write PMDs");
exit(EXIT_FAILURE);
}
}
static void load_context(struct child * self)
{
pfarg_load_t load_args;
int err;
memset(&load_args, 0, sizeof(load_args));
load_args.load_pid = self->pid;
err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1);
if (err == -1) {
perror("Couldn't load context");
exit(EXIT_FAILURE);
}
}
static void notify_parent(struct child * self, size_t cpu)
{
for (;;) {
ssize_t ret;
ret = write(self->up_pipe[1], &cpu, sizeof(size_t));
if (ret == sizeof(size_t))
break;
if (ret < 0 && errno != EINTR) {
perror("Failed to write child pipe:");
exit(EXIT_FAILURE);
}
}
}
static struct child * inner_child;
void close_pipe(void)
{
close(inner_child->up_pipe[1]);
}
static void run_child(size_t cpu)
{
struct child * self = &children[cpu];
self->pid = getpid();
self->sigusr1 = 0;
self->sigusr2 = 0;
self->sigterm = 0;
inner_child = self;
if (atexit(close_pipe)){
close_pipe();
exit(EXIT_FAILURE);
}
umask(0);
/* Change directory to allow directory to be removed */
if (chdir("/") < 0) {
perror("Unable to chdir to \"/\"");
exit(EXIT_FAILURE);
}
setup_signals();
set_affinity(cpu);
create_context(self);
write_pmu(self);
load_context(self);
notify_parent(self, cpu);
/* Redirect standard files to /dev/null */
freopen( "/dev/null", "r", stdin);
freopen( "/dev/null", "w", stdout);
freopen( "/dev/null", "w", stderr);
for (;;) {
sigset_t sigmask;
sigfillset(&sigmask);
sigdelset(&sigmask, SIGUSR1);
sigdelset(&sigmask, SIGUSR2);
sigdelset(&sigmask, SIGTERM);
if (self->sigusr1) {
perfmon_start_child(self->ctx_fd);
self->sigusr1 = 0;
}
if (self->sigusr2) {
perfmon_stop_child(self->ctx_fd);
self->sigusr2 = 0;
}
sigsuspend(&sigmask);
}
}
static void wait_for_child(struct child * child)
{
size_t tmp;
for (;;) {
ssize_t ret;
ret = read(child->up_pipe[0], &tmp, sizeof(size_t));
if (ret == sizeof(size_t))
break;
if ((ret < 0 && errno != EINTR) || ret == 0 ) {
perror("Failed to read child pipe");
exit(EXIT_FAILURE);
}
}
printf("Perfmon child up on CPU%d\n", (int)tmp);
fflush(stdout);
close(child->up_pipe[0]);
}
static struct child* xen_ctx;
void perfmon_init(void)
{
size_t i;
long nr;
if (cpu_type == CPU_TIMER_INT)
return;
if (!no_xen) {
xen_ctx = xmalloc(sizeof(struct child));
xen_ctx->pid = getpid();
xen_ctx->up_pipe[0] = -1;
xen_ctx->up_pipe[1] = -1;
xen_ctx->sigusr1 = 0;
xen_ctx->sigusr2 = 0;
xen_ctx->sigterm = 0;
create_context(xen_ctx);
write_pmu(xen_ctx);
load_context(xen_ctx);
return;
}
nr = sysconf(_SC_NPROCESSORS_ONLN);
if (nr == -1) {
fprintf(stderr, "Couldn't determine number of CPUs.\n");
exit(EXIT_FAILURE);
}
nr_cpus = nr;
children = xmalloc(sizeof(struct child) * nr_cpus);
bzero(children, sizeof(struct child) * nr_cpus);
for (i = 0; i < nr_cpus; ++i) {
int ret;
if (pipe(children[i].up_pipe)) {
perror("Couldn't create child pipe");
exit(EXIT_FAILURE);
}
ret = fork();
if (ret == -1) {
perror("Couldn't fork perfmon child");
exit(EXIT_FAILURE);
} else if (ret == 0) {
close(children[i].up_pipe[0]);
run_child(i);
} else {
children[i].pid = ret;
close(children[i].up_pipe[1]);
printf("Waiting on CPU%d\n", (int)i);
wait_for_child(&children[i]);
}
}
}
void perfmon_exit(void)
{
size_t i;
if (cpu_type == CPU_TIMER_INT)
return;
if (!no_xen)
return;
for (i = 0; i < nr_cpus; ++i) {
if (children[i].pid) {
int c_pid = children[i].pid;
children[i].pid = 0;
if (kill(c_pid, SIGKILL)==0)
waitpid(c_pid, NULL, 0);
}
}
}
void perfmon_start(void)
{
size_t i;
if (cpu_type == CPU_TIMER_INT)
return;
if (!no_xen) {
perfmon_start_child(xen_ctx->ctx_fd);
return;
}
for (i = 0; i < nr_cpus; ++i) {
if (kill(children[i].pid, SIGUSR1)) {
perror("Unable to start perfmon");
exit(EXIT_FAILURE);
}
}
}
void perfmon_stop(void)
{
size_t i;
if (cpu_type == CPU_TIMER_INT)
return;
if (!no_xen) {
perfmon_stop_child(xen_ctx->ctx_fd);
return;
}
for (i = 0; i < nr_cpus; ++i)
if (kill(children[i].pid, SIGUSR2)) {
perror("Unable to stop perfmon");
exit(EXIT_FAILURE);
}
}
#endif /* __ia64__ */