/**
* @file op_syscalls.c
* Tracing of system calls
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon
* @author Philippe Elie
*/
#include <linux/sched.h>
#include <linux/unistd.h>
#include <linux/mman.h>
#include <linux/file.h>
#include "oprofile.h"
void oprof_put_note(struct op_note * samp);
void __oprof_put_note(struct op_note * samp);
extern spinlock_t note_lock;
/* ------------ system calls --------------- */
struct mmap_arg_struct {
unsigned long addr;
unsigned long len;
unsigned long prot;
unsigned long flags;
unsigned long fd;
unsigned long offset;
};
asmlinkage static int (*old_sys_fork)(struct pt_regs);
asmlinkage static int (*old_sys_vfork)(struct pt_regs);
asmlinkage static int (*old_sys_clone)(struct pt_regs);
asmlinkage static int (*old_sys_execve)(struct pt_regs);
asmlinkage static int (*old_old_mmap)(struct mmap_arg_struct *);
#ifdef HAVE_MMAP2
asmlinkage static long (*old_sys_mmap2)(ulong, ulong, ulong, ulong, ulong, ulong);
#endif
asmlinkage static long (*old_sys_init_module)(char const *, struct module *);
asmlinkage static long (*old_sys_exit)(int);
/* called with note_lock held */
static void oprof_output_map(ulong addr, ulong len, ulong offset,
struct file * file, int is_execve)
{
struct op_note note;
/* don't bother with /dev/zero mappings etc. */
if (!len)
return;
note.pid = current->pid;
note.tgid = op_get_tgid();
note.addr = addr;
note.len = len;
note.offset = offset;
note.type = is_execve ? OP_EXEC : OP_MAP;
note.hash = hash_path(file);
if (note.hash == -1)
return;
/* holding note lock */
__oprof_put_note(¬e);
}
static int oprof_output_maps(struct task_struct * task)
{
int size=0;
struct mm_struct * mm;
struct vm_area_struct * map;
/* we don't need to worry about mm_users here, since there is at
least one user (current), and if there's other code using this
mm, then mm_users must be at least 2; we should never have to
mmput() here. */
if (!(mm = task->mm))
goto out;
lock_mmap(mm);
spin_lock(¬e_lock);
/* We need two pass, daemon assume than the first mmap notification
* is for the executable but some process doesn't follow this model.
*/
for (map = mm->mmap; map; map = map->vm_next) {
if (!(map->vm_flags & VM_EXEC) || !map->vm_file)
continue;
if (!(map->vm_flags & VM_EXECUTABLE))
continue;
oprof_output_map(map->vm_start, map->vm_end-map->vm_start,
GET_VM_OFFSET(map), map->vm_file, 1);
}
for (map = mm->mmap; map; map = map->vm_next) {
if (!(map->vm_flags & VM_EXEC) || !map->vm_file)
continue;
if (map->vm_flags & VM_EXECUTABLE)
continue;
oprof_output_map(map->vm_start, map->vm_end-map->vm_start,
GET_VM_OFFSET(map), map->vm_file, 0);
}
spin_unlock(¬e_lock);
unlock_mmap(mm);
out:
return size;
}
asmlinkage static int my_sys_execve(struct pt_regs regs)
{
char * filename;
int ret;
MOD_INC_USE_COUNT;
lock_execve();
filename = getname((char *)regs.ebx);
if (IS_ERR(filename)) {
ret = PTR_ERR(filename);
goto out;
}
ret = do_execve(filename, (char **)regs.ecx, (char **)regs.edx, ®s);
if (!ret) {
PTRACE_OFF(current);
oprof_output_maps(current);
}
putname(filename);
out:
unlock_execve();
MOD_DEC_USE_COUNT;
return ret;
}
static void out_mmap(ulong addr, ulong len, ulong prot, ulong flags, ulong fd,
ulong offset)
{
struct file * file;
lock_out_mmap();
file = fget(fd);
if (!file)
goto out;
spin_lock(¬e_lock);
oprof_output_map(addr, len, offset, file, 0);
spin_unlock(¬e_lock);
fput(file);
out:
unlock_out_mmap();
}
#ifdef HAVE_MMAP2
asmlinkage static int my_sys_mmap2(ulong addr, ulong len,
ulong prot, ulong flags, ulong fd, ulong pgoff)
{
int ret;
MOD_INC_USE_COUNT;
ret = old_sys_mmap2(addr, len, prot, flags, fd, pgoff);
if ((prot & PROT_EXEC) && ret >= 0)
out_mmap(ret, len, prot, flags, fd, pgoff << PAGE_SHIFT);
MOD_DEC_USE_COUNT;
return ret;
}
#endif
asmlinkage static int my_old_mmap(struct mmap_arg_struct * arg)
{
int ret;
MOD_INC_USE_COUNT;
ret = old_old_mmap(arg);
if (ret >= 0) {
struct mmap_arg_struct a;
if (copy_from_user(&a, arg, sizeof(a))) {
ret = -EFAULT;
goto out;
}
if (a.prot&PROT_EXEC)
out_mmap(ret, a.len, a.prot, a.flags, a.fd, a.offset);
}
out:
MOD_DEC_USE_COUNT;
return ret;
}
inline static void oprof_report_fork(u32 old_pid, u32 new_pid, u32 old_tgid, u32 new_tgid)
{
struct op_note note;
note.type = OP_FORK;
note.pid = old_pid;
note.tgid = old_tgid;
note.addr = new_pid;
note.len = new_tgid;
oprof_put_note(¬e);
}
asmlinkage static int my_sys_fork(struct pt_regs regs)
{
u32 pid = current->pid;
u32 tgid = op_get_tgid();
int ret;
MOD_INC_USE_COUNT;
ret = old_sys_fork(regs);
if (ret)
oprof_report_fork(pid, ret, tgid, ret);
MOD_DEC_USE_COUNT;
return ret;
}
asmlinkage static int my_sys_vfork(struct pt_regs regs)
{
u32 pid = current->pid;
u32 tgid = op_get_tgid();
int ret;
MOD_INC_USE_COUNT;
ret = old_sys_vfork(regs);
if (ret)
oprof_report_fork(pid, ret, tgid, ret);
MOD_DEC_USE_COUNT;
return ret;
}
asmlinkage static int my_sys_clone(struct pt_regs regs)
{
u32 pid = current->pid;
u32 tgid = op_get_tgid();
#if V_AT_LEAST(2, 4, 0)
u32 clone_flags = regs.ebx;
#endif
int ret;
MOD_INC_USE_COUNT;
ret = old_sys_clone(regs);
if (ret) {
#if V_AT_LEAST(2, 4, 0)
if (clone_flags & CLONE_THREAD)
oprof_report_fork(pid, ret, tgid, tgid);
else
#endif
oprof_report_fork(pid, ret, tgid, ret);
}
MOD_DEC_USE_COUNT;
return ret;
}
asmlinkage static long my_sys_init_module(char const * name_user, struct module * mod_user)
{
long ret;
MOD_INC_USE_COUNT;
ret = old_sys_init_module(name_user, mod_user);
if (ret >= 0) {
struct op_note note;
note.type = OP_DROP_MODULES;
oprof_put_note(¬e);
}
MOD_DEC_USE_COUNT;
return ret;
}
/* used from do_nmi */
asmlinkage long my_sys_exit(int error_code)
{
struct op_note note;
MOD_INC_USE_COUNT;
note.type = OP_EXIT;
note.pid = current->pid;
note.tgid = op_get_tgid();
oprof_put_note(¬e);
/* this looks UP-dangerous, as the exit sleeps and we don't
* have a use count, but in fact its ok as sys_exit is noreturn,
* so we can never come back to this non-existent exec page
*/
MOD_DEC_USE_COUNT;
return old_sys_exit(error_code);
}
extern void * sys_call_table[];
void op_save_syscalls(void)
{
old_sys_fork = sys_call_table[__NR_fork];
old_sys_vfork = sys_call_table[__NR_vfork];
old_sys_clone = sys_call_table[__NR_clone];
old_sys_execve = sys_call_table[__NR_execve];
old_old_mmap = sys_call_table[__NR_mmap];
#ifdef HAVE_MMAP2
old_sys_mmap2 = sys_call_table[__NR_mmap2];
#endif
old_sys_init_module = sys_call_table[__NR_init_module];
old_sys_exit = sys_call_table[__NR_exit];
}
void op_intercept_syscalls(void)
{
sys_call_table[__NR_fork] = my_sys_fork;
sys_call_table[__NR_vfork] = my_sys_vfork;
sys_call_table[__NR_clone] = my_sys_clone;
sys_call_table[__NR_execve] = my_sys_execve;
sys_call_table[__NR_mmap] = my_old_mmap;
#ifdef HAVE_MMAP2
sys_call_table[__NR_mmap2] = my_sys_mmap2;
#endif
sys_call_table[__NR_init_module] = my_sys_init_module;
sys_call_table[__NR_exit] = my_sys_exit;
}
void op_restore_syscalls(void)
{
sys_call_table[__NR_fork] = old_sys_fork;
sys_call_table[__NR_vfork] = old_sys_vfork;
sys_call_table[__NR_clone] = old_sys_clone;
sys_call_table[__NR_execve] = old_sys_execve;
sys_call_table[__NR_mmap] = old_old_mmap;
#ifdef HAVE_MMAP2
sys_call_table[__NR_mmap2] = old_sys_mmap2;
#endif
sys_call_table[__NR_init_module] = old_sys_init_module;
sys_call_table[__NR_exit] = old_sys_exit;
}