/** * @file op_syscalls.c * Tracing of system calls * * @remark Copyright 2002 OProfile authors * @remark Read the file COPYING * * @author John Levon * @author Philippe Elie */ #include <linux/sched.h> #include <linux/unistd.h> #include <linux/mman.h> #include <linux/file.h> #include "oprofile.h" void oprof_put_note(struct op_note * samp); void __oprof_put_note(struct op_note * samp); extern spinlock_t note_lock; /* ------------ system calls --------------- */ struct mmap_arg_struct { unsigned long addr; unsigned long len; unsigned long prot; unsigned long flags; unsigned long fd; unsigned long offset; }; asmlinkage static int (*old_sys_fork)(struct pt_regs); asmlinkage static int (*old_sys_vfork)(struct pt_regs); asmlinkage static int (*old_sys_clone)(struct pt_regs); asmlinkage static int (*old_sys_execve)(struct pt_regs); asmlinkage static int (*old_old_mmap)(struct mmap_arg_struct *); #ifdef HAVE_MMAP2 asmlinkage static long (*old_sys_mmap2)(ulong, ulong, ulong, ulong, ulong, ulong); #endif asmlinkage static long (*old_sys_init_module)(char const *, struct module *); asmlinkage static long (*old_sys_exit)(int); /* called with note_lock held */ static void oprof_output_map(ulong addr, ulong len, ulong offset, struct file * file, int is_execve) { struct op_note note; /* don't bother with /dev/zero mappings etc. */ if (!len) return; note.pid = current->pid; note.tgid = op_get_tgid(); note.addr = addr; note.len = len; note.offset = offset; note.type = is_execve ? OP_EXEC : OP_MAP; note.hash = hash_path(file); if (note.hash == -1) return; /* holding note lock */ __oprof_put_note(¬e); } static int oprof_output_maps(struct task_struct * task) { int size=0; struct mm_struct * mm; struct vm_area_struct * map; /* we don't need to worry about mm_users here, since there is at least one user (current), and if there's other code using this mm, then mm_users must be at least 2; we should never have to mmput() here. */ if (!(mm = task->mm)) goto out; lock_mmap(mm); spin_lock(¬e_lock); /* We need two pass, daemon assume than the first mmap notification * is for the executable but some process doesn't follow this model. */ for (map = mm->mmap; map; map = map->vm_next) { if (!(map->vm_flags & VM_EXEC) || !map->vm_file) continue; if (!(map->vm_flags & VM_EXECUTABLE)) continue; oprof_output_map(map->vm_start, map->vm_end-map->vm_start, GET_VM_OFFSET(map), map->vm_file, 1); } for (map = mm->mmap; map; map = map->vm_next) { if (!(map->vm_flags & VM_EXEC) || !map->vm_file) continue; if (map->vm_flags & VM_EXECUTABLE) continue; oprof_output_map(map->vm_start, map->vm_end-map->vm_start, GET_VM_OFFSET(map), map->vm_file, 0); } spin_unlock(¬e_lock); unlock_mmap(mm); out: return size; } asmlinkage static int my_sys_execve(struct pt_regs regs) { char * filename; int ret; MOD_INC_USE_COUNT; lock_execve(); filename = getname((char *)regs.ebx); if (IS_ERR(filename)) { ret = PTR_ERR(filename); goto out; } ret = do_execve(filename, (char **)regs.ecx, (char **)regs.edx, ®s); if (!ret) { PTRACE_OFF(current); oprof_output_maps(current); } putname(filename); out: unlock_execve(); MOD_DEC_USE_COUNT; return ret; } static void out_mmap(ulong addr, ulong len, ulong prot, ulong flags, ulong fd, ulong offset) { struct file * file; lock_out_mmap(); file = fget(fd); if (!file) goto out; spin_lock(¬e_lock); oprof_output_map(addr, len, offset, file, 0); spin_unlock(¬e_lock); fput(file); out: unlock_out_mmap(); } #ifdef HAVE_MMAP2 asmlinkage static int my_sys_mmap2(ulong addr, ulong len, ulong prot, ulong flags, ulong fd, ulong pgoff) { int ret; MOD_INC_USE_COUNT; ret = old_sys_mmap2(addr, len, prot, flags, fd, pgoff); if ((prot & PROT_EXEC) && ret >= 0) out_mmap(ret, len, prot, flags, fd, pgoff << PAGE_SHIFT); MOD_DEC_USE_COUNT; return ret; } #endif asmlinkage static int my_old_mmap(struct mmap_arg_struct * arg) { int ret; MOD_INC_USE_COUNT; ret = old_old_mmap(arg); if (ret >= 0) { struct mmap_arg_struct a; if (copy_from_user(&a, arg, sizeof(a))) { ret = -EFAULT; goto out; } if (a.prot&PROT_EXEC) out_mmap(ret, a.len, a.prot, a.flags, a.fd, a.offset); } out: MOD_DEC_USE_COUNT; return ret; } inline static void oprof_report_fork(u32 old_pid, u32 new_pid, u32 old_tgid, u32 new_tgid) { struct op_note note; note.type = OP_FORK; note.pid = old_pid; note.tgid = old_tgid; note.addr = new_pid; note.len = new_tgid; oprof_put_note(¬e); } asmlinkage static int my_sys_fork(struct pt_regs regs) { u32 pid = current->pid; u32 tgid = op_get_tgid(); int ret; MOD_INC_USE_COUNT; ret = old_sys_fork(regs); if (ret) oprof_report_fork(pid, ret, tgid, ret); MOD_DEC_USE_COUNT; return ret; } asmlinkage static int my_sys_vfork(struct pt_regs regs) { u32 pid = current->pid; u32 tgid = op_get_tgid(); int ret; MOD_INC_USE_COUNT; ret = old_sys_vfork(regs); if (ret) oprof_report_fork(pid, ret, tgid, ret); MOD_DEC_USE_COUNT; return ret; } asmlinkage static int my_sys_clone(struct pt_regs regs) { u32 pid = current->pid; u32 tgid = op_get_tgid(); #if V_AT_LEAST(2, 4, 0) u32 clone_flags = regs.ebx; #endif int ret; MOD_INC_USE_COUNT; ret = old_sys_clone(regs); if (ret) { #if V_AT_LEAST(2, 4, 0) if (clone_flags & CLONE_THREAD) oprof_report_fork(pid, ret, tgid, tgid); else #endif oprof_report_fork(pid, ret, tgid, ret); } MOD_DEC_USE_COUNT; return ret; } asmlinkage static long my_sys_init_module(char const * name_user, struct module * mod_user) { long ret; MOD_INC_USE_COUNT; ret = old_sys_init_module(name_user, mod_user); if (ret >= 0) { struct op_note note; note.type = OP_DROP_MODULES; oprof_put_note(¬e); } MOD_DEC_USE_COUNT; return ret; } /* used from do_nmi */ asmlinkage long my_sys_exit(int error_code) { struct op_note note; MOD_INC_USE_COUNT; note.type = OP_EXIT; note.pid = current->pid; note.tgid = op_get_tgid(); oprof_put_note(¬e); /* this looks UP-dangerous, as the exit sleeps and we don't * have a use count, but in fact its ok as sys_exit is noreturn, * so we can never come back to this non-existent exec page */ MOD_DEC_USE_COUNT; return old_sys_exit(error_code); } extern void * sys_call_table[]; void op_save_syscalls(void) { old_sys_fork = sys_call_table[__NR_fork]; old_sys_vfork = sys_call_table[__NR_vfork]; old_sys_clone = sys_call_table[__NR_clone]; old_sys_execve = sys_call_table[__NR_execve]; old_old_mmap = sys_call_table[__NR_mmap]; #ifdef HAVE_MMAP2 old_sys_mmap2 = sys_call_table[__NR_mmap2]; #endif old_sys_init_module = sys_call_table[__NR_init_module]; old_sys_exit = sys_call_table[__NR_exit]; } void op_intercept_syscalls(void) { sys_call_table[__NR_fork] = my_sys_fork; sys_call_table[__NR_vfork] = my_sys_vfork; sys_call_table[__NR_clone] = my_sys_clone; sys_call_table[__NR_execve] = my_sys_execve; sys_call_table[__NR_mmap] = my_old_mmap; #ifdef HAVE_MMAP2 sys_call_table[__NR_mmap2] = my_sys_mmap2; #endif sys_call_table[__NR_init_module] = my_sys_init_module; sys_call_table[__NR_exit] = my_sys_exit; } void op_restore_syscalls(void) { sys_call_table[__NR_fork] = old_sys_fork; sys_call_table[__NR_vfork] = old_sys_vfork; sys_call_table[__NR_clone] = old_sys_clone; sys_call_table[__NR_execve] = old_sys_execve; sys_call_table[__NR_mmap] = old_old_mmap; #ifdef HAVE_MMAP2 sys_call_table[__NR_mmap2] = old_sys_mmap2; #endif sys_call_table[__NR_init_module] = old_sys_init_module; sys_call_table[__NR_exit] = old_sys_exit; }