/** * @file op_syscalls.c * Tracing of system calls * * @remark Copyright 2002 OProfile authors * @remark Read the file COPYING * * @author Bob Montgomery * @author Will Cohen * @author John Levon * @author Philippe Elie */ #include <linux/sched.h> #include <linux/unistd.h> #include <linux/mman.h> #include <linux/file.h> #include "oprofile.h" #include "op_dcache.h" #include "op_util.h" uint dname_top; struct qstr **dname_stack; char * pool_pos; char * pool_start; char * pool_end; void oprof_put_note(struct op_note * samp); /* ------------ system calls --------------- */ struct mmap_arg_struct { unsigned long addr; unsigned long len; unsigned long prot; unsigned long flags; unsigned long fd; unsigned long offset; }; /* --------- IA64 versions of system calls ------ */ asmlinkage static int (*old_sys_clone)(long, long); asmlinkage static int (*old_sys_clone2)(long, long, long); asmlinkage static int (*old_sys_execve)(char *, char **, char **); asmlinkage static unsigned long (*old_sys_mmap)(unsigned long, unsigned long, int, int, int, long); asmlinkage static unsigned long (*old_sys_mmap2)(unsigned long, unsigned long, int, int, int, long); asmlinkage static long (*old_sys_init_module)(char const *, struct module *); asmlinkage static long (*old_sys_exit)(int); /* --------- declarations of interception stubs for IA64 ------ */ asmlinkage long post_stub_clone(long, long); asmlinkage long post_stub_clone2(long, long, long); asmlinkage long my_ia64_execve(char *, char **, char **); asmlinkage unsigned long post_stub_mmap(unsigned long, unsigned long, int, int, int, long); asmlinkage unsigned long post_stub_mmap2(unsigned long, unsigned long, int, int, int, long); asmlinkage long post_stub_init_module(char const *, struct module *); asmlinkage long pre_stub_exit(int); /* IA64 system call table doesn't use function pointers, it uses * pointers to code (not the same thing). Basically it can violate the * procedure calling rules because these "procedure calls" are made by * the assembly language BREAK handler in ivt.S. */ struct fdesc { void * ip; void * gp; }; struct fdesc fdesc_clone; struct fdesc fdesc_clone2; struct fdesc fdesc_execve; struct fdesc fdesc_mmap; struct fdesc fdesc_mmap2; struct fdesc fdesc_init_module; struct fdesc fdesc_exit; /* ----------- End of IA64 weirdness for now -------------- */ spinlock_t map_lock = SPIN_LOCK_UNLOCKED; /* called with map_lock held */ static void oprof_output_map(ulong addr, ulong len, ulong offset, struct file * file, int is_execve) { struct op_note note; /* don't bother with /dev/zero mappings etc. */ if (!len) return; note.pid = current->pid; note.tgid = op_get_tgid(); note.addr = addr; note.len = len; note.offset = offset; note.type = is_execve ? OP_EXEC : OP_MAP; note.hash = hash_path(file); if (note.hash == -1) return; oprof_put_note(¬e); } static int oprof_output_maps(struct task_struct * task) { int size=0; struct mm_struct * mm; struct vm_area_struct * map; /* we don't need to worry about mm_users here, since there is at least one user (current), and if there's other code using this mm, then mm_users must be at least 2; we should never have to mmput() here. */ if (!(mm = task->mm)) goto out; lock_mmap(mm); spin_lock(&map_lock); /* We need two pass, daemon assume than the first mmap notification * is for the executable but some process doesn't follow this model. */ for (map = mm->mmap; map; map = map->vm_next) { if (!(map->vm_flags & VM_EXEC) || !map->vm_file) continue; if (!(map->vm_flags & VM_EXECUTABLE)) continue; oprof_output_map(map->vm_start, map->vm_end-map->vm_start, GET_VM_OFFSET(map), map->vm_file, 1); } for (map = mm->mmap; map; map = map->vm_next) { if (!(map->vm_flags & VM_EXEC) || !map->vm_file) continue; if (map->vm_flags & VM_EXECUTABLE) continue; oprof_output_map(map->vm_start, map->vm_end-map->vm_start, GET_VM_OFFSET(map), map->vm_file, 0); } spin_unlock(&map_lock); unlock_mmap(mm); out: return size; } /* execve is a special case on IA64. The others get the result and * arguments after the system call has been made from the ASM stub. */ asmlinkage long my_sys_execve (char * filename, char **argv, char **envp, struct pt_regs * regs) { int error; MOD_INC_USE_COUNT; filename = getname(filename); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; error = do_execve(filename, argv, envp, regs); if (!error) { PTRACE_OFF(current); oprof_output_maps(current); } putname(filename); out: unlock_execve(); MOD_DEC_USE_COUNT; return error; } static void out_mmap(ulong addr, ulong len, ulong prot, ulong flags, ulong fd, ulong offset) { struct file * file; lock_out_mmap(); file = fget(fd); if (!file) goto out; spin_lock(&map_lock); oprof_output_map(addr, len, offset, file, 0); spin_unlock(&map_lock); fput(file); out: unlock_out_mmap(); } /* * IA64 mmap routines: * The post_sys_* routines are called after the syscall has been made. * The first argument is the return value from the system call. */ asmlinkage void post_sys_mmap2(ulong ret, ulong addr, ulong len, ulong prot, ulong flags, ulong fd, ulong pgoff) { /* FIXME: This should be done in the ASM stub. */ MOD_INC_USE_COUNT; if ((prot & PROT_EXEC) && ret >= 0) out_mmap(ret, len, prot, flags, fd, pgoff << PAGE_SHIFT); goto out; out: MOD_DEC_USE_COUNT; } asmlinkage void post_sys_mmap(ulong ret, ulong addr, ulong len, ulong prot, ulong flags, ulong fd, ulong off) { /* FIXME: This should be done in the ASM stub. */ MOD_INC_USE_COUNT; if ((prot & PROT_EXEC) && ret >= 0) out_mmap(ret, len, prot, flags, fd, off); goto out; out: MOD_DEC_USE_COUNT; } inline static void oprof_report_fork(u32 old_pid, u32 new_pid, u32 old_tgid, u32 new_tgid) { struct op_note note; note.type = OP_FORK; note.pid = old_pid; note.tgid = old_tgid; note.addr = new_pid; note.len = new_tgid; oprof_put_note(¬e); } asmlinkage void post_sys_clone(long ret, long arg0, long arg1) { u32 pid = current->pid; u32 tgid = op_get_tgid(); /* FIXME: This should be done in the ASM stub. */ MOD_INC_USE_COUNT; if (ret) /* FIXME: my libc show clone() is not implemented in ia64 * but used only by fork() with a SIGCHILD first parameter * so we assume it's a fork */ oprof_report_fork(pid, ret, pid, tgid); MOD_DEC_USE_COUNT; } asmlinkage void post_sys_clone2(long ret, long arg0, long arg1, long arg2) { u32 pid = current->pid; u32 tgid = op_get_tgid(); long clone_flags = arg0; /* FIXME: This should be done in the ASM stub. */ MOD_INC_USE_COUNT; if (ret) { if (clone_flags & CLONE_THREAD) oprof_report_fork(pid, ret, tgid, tgid); else oprof_report_fork(pid, ret, tgid, ret); } MOD_DEC_USE_COUNT; } asmlinkage void post_sys_init_module(long ret, char const * name_user, struct module * mod_user) { /* FIXME: This should be done in the ASM stub. */ MOD_INC_USE_COUNT; if (ret >= 0) { struct op_note note; note.type = OP_DROP_MODULES; oprof_put_note(¬e); } MOD_DEC_USE_COUNT; } /* Exit must use a pre-call intercept stub. There is no post exit. */ asmlinkage void pre_sys_exit(int error_code) { struct op_note note; MOD_INC_USE_COUNT; note.addr = current->times.tms_utime; note.len = current->times.tms_stime; note.offset = current->start_time; note.type = OP_EXIT; note.pid = current->pid; note.tgid = op_get_tgid(); oprof_put_note(¬e); /* this looks UP-dangerous, as the exit sleeps and we don't * have a use count, but in fact its ok as sys_exit is noreturn, * so we can never come back to this non-existent exec page */ MOD_DEC_USE_COUNT; } extern void * sys_call_table[]; /* FIXME: Now that I'm never trying to do a C-level call through these * pointers, I should just save, intercept, and restore with void * * instead of the void * part of the function descriptor, I think. */ void op_save_syscalls(void) { fdesc_clone.ip = sys_call_table[__NR_clone - __NR_ni_syscall]; old_sys_clone = (void *)&fdesc_clone; fdesc_clone2.ip = sys_call_table[__NR_clone2 - __NR_ni_syscall]; old_sys_clone2 = (void *)&fdesc_clone2; fdesc_execve.ip = sys_call_table[__NR_execve - __NR_ni_syscall]; old_sys_execve = (void *)&fdesc_execve; fdesc_mmap.ip = sys_call_table[__NR_mmap - __NR_ni_syscall]; old_sys_mmap = (void *)&fdesc_mmap; fdesc_mmap2.ip = sys_call_table[__NR_mmap2 - __NR_ni_syscall]; old_sys_mmap2 = (void *)&fdesc_mmap2; fdesc_init_module.ip = sys_call_table[__NR_init_module - __NR_ni_syscall]; old_sys_init_module = (void *)&fdesc_init_module; fdesc_exit.ip = sys_call_table[__NR_exit - __NR_ni_syscall]; old_sys_exit = (void *)&fdesc_exit; } void op_intercept_syscalls(void) { /* Must extract the function address from the stub function * descriptors. */ sys_call_table[__NR_clone - __NR_ni_syscall] = ((struct fdesc *)post_stub_clone)->ip; sys_call_table[__NR_clone2 - __NR_ni_syscall] = ((struct fdesc *)post_stub_clone2)->ip; sys_call_table[__NR_execve - __NR_ni_syscall] = ((struct fdesc *)my_ia64_execve)->ip; sys_call_table[__NR_mmap - __NR_ni_syscall] = ((struct fdesc *)post_stub_mmap)->ip; sys_call_table[__NR_mmap2 - __NR_ni_syscall] = ((struct fdesc *)post_stub_mmap2)->ip; sys_call_table[__NR_init_module - __NR_ni_syscall] = ((struct fdesc *)post_stub_init_module)->ip; sys_call_table[__NR_exit - __NR_ni_syscall] = ((struct fdesc *)pre_stub_exit)->ip; } void op_restore_syscalls(void) { sys_call_table[__NR_clone - __NR_ni_syscall] = ((struct fdesc *)old_sys_clone)->ip; sys_call_table[__NR_clone2 - __NR_ni_syscall] = ((struct fdesc *)old_sys_clone2)->ip; sys_call_table[__NR_execve - __NR_ni_syscall] = ((struct fdesc *)old_sys_execve)->ip; sys_call_table[__NR_mmap - __NR_ni_syscall] = ((struct fdesc *)old_sys_mmap)->ip; sys_call_table[__NR_mmap2 - __NR_ni_syscall] = ((struct fdesc *)old_sys_mmap2)->ip; sys_call_table[__NR_init_module - __NR_ni_syscall] = ((struct fdesc *)old_sys_init_module)->ip; sys_call_table[__NR_exit - __NR_ni_syscall] = ((struct fdesc *)old_sys_exit)->ip; }