--- linux-2.6.29-gs3.2-bfq/arch/x86/Kconfig.debug 2009-03-24 00:12:14.000000000 +0100 +++ linux-2.6.29-gs3.2-bfq-qtrace/arch/x86/Kconfig.debug 2009-10-05 15:44:58.766288048 +0200 @@ -5,6 +5,12 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" +config QOSTRACE + bool "Low overhead profiling of system calls" + default y + ---help--- + Use this only if you know what it means. + config STRICT_DEVMEM bool "Filter access to /dev/mem" help --- linux-2.6.29-gs3.2-bfq/arch/x86/include/asm/thread_info.h 2009-03-24 00:12:14.000000000 +0100 +++ linux-2.6.29-gs3.2-bfq-qtrace/arch/x86/include/asm/thread_info.h 2009-10-05 16:02:19.832696426 +0200 @@ -94,6 +94,8 @@ struct thread_info { #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ +#define TIF_SYSCALL_QOSTRACE 29 /* for me */ + #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -115,14 +117,18 @@ struct thread_info { #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_SYSCALL_QOSTRACE (1 << TIF_SYSCALL_QOSTRACE) + /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \ - _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP) + _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP | \ + _TIF_SYSCALL_QOSTRACE) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP) + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ + _TIF_SYSCALL_QOSTRACE) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -131,7 +137,7 @@ struct thread_info { _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) /* work to do on any return to user space */ -#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) +#define _TIF_ALLWORK_MASK ((0x0000FFFF & ~_TIF_SECCOMP)|_TIF_SYSCALL_QOSTRACE) /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ --- linux-2.6.29-gs3.2-bfq/arch/x86/kernel/ptrace.c 2009-03-24 00:12:14.000000000 +0100 +++ linux-2.6.29-gs3.2-bfq-qtrace/arch/x86/kernel/ptrace.c 2009-10-05 20:19:13.298258905 +0200 @@ -21,6 +21,9 @@ #include #include #include +#include +#include +#include #include #include @@ -60,6 +63,165 @@ enum x86_regset { X86_EFLAGS_DF | X86_EFLAGS_OF | \ X86_EFLAGS_RF | X86_EFLAGS_AC)) +#define QOSTRACE_LEAVE 0x10000 +#define QOSTRACE_ENTER 0x20000 + +#ifdef CONFIG_QOSTRACE +struct qostrace_event { + int event; + int pid; + struct timespec time; +}; + +#define QOSTRACE_NR_SYSCALLS 300 +#define QOSTRACE_MAX_EVENTS 32768 + +static spinlock_t qostrace_lock; +static DECLARE_BITMAP(qostrace_enabled, QOSTRACE_NR_SYSCALLS); +static int qostrace_first, qostrace_last; +static struct qostrace_event qostrace_events[QOSTRACE_MAX_EVENTS]; + +static inline int qostrace_get(char __user *buf) +{ + struct qostrace_event *event; + + spin_lock(&qostrace_lock); + if (qostrace_first == qostrace_last) { + spin_unlock(&qostrace_lock); + return 0; + } + + event = &qostrace_events[qostrace_first]; + qostrace_first = (qostrace_first + 1) % QOSTRACE_MAX_EVENTS; + spin_unlock(&qostrace_lock); + + put_user(event->event, (u32 *)buf); + put_user(event->pid, (u32 *)buf + 1); + put_user(event->time.tv_sec, (u32 *)buf + 2); + put_user(event->time.tv_nsec, (u32 *)buf + 3); + + return 16; +} + +static ssize_t qostrace_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int i = 0, v = 1, ret = 0; + + for (i = 0; i < count / 16 && v; i++) { + v = qostrace_get(buf); + ret += v; + } + + return ret; +} + +static ssize_t qostrace_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int cmd, id, enabled, ret = 2 * sizeof(int); + struct task_struct *p; + + get_user(cmd, (int *)buf); + get_user(id, (int *)buf + 1); + get_user(enabled, (int *)buf + 2); + + spin_lock(&qostrace_lock); + switch (cmd) { + case 0: + if (id < 0 || id > QOSTRACE_NR_SYSCALLS) + ret = -EINVAL; + else if (enabled) + __set_bit(id, qostrace_enabled); + else + __clear_bit(id, qostrace_enabled); + break; + case 1: + p = find_task_by_vpid(id); + if (!p) + ret = -EINVAL; + else if (enabled) + set_tsk_thread_flag(p, TIF_SYSCALL_QOSTRACE); + else + clear_tsk_thread_flag(p, TIF_SYSCALL_QOSTRACE); + break; + case 2: + qostrace_first = qostrace_last = 0; + break; + default: + ret = -EINVAL; + } + spin_unlock(&qostrace_lock); + + return ret; +} + +static int qostrace_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int qostrace_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static const struct file_operations qostrace_dev_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = qostrace_read, + .write = qostrace_write, + .open = qostrace_open, + .release = qostrace_release, +}; + +static struct miscdevice qostrace_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "qostrace", + .fops = &qostrace_dev_fops, +}; + +extern void getnstimeofday(struct timespec *ts); + +static inline void qostrace_clock(struct timespec *time) +{ + getnstimeofday(time); +} + +static inline void qostrace_syscall(struct pt_regs *regs, int evmask) +{ + struct qostrace_event *event; + int id = regs->orig_ax; + + if (id < 0 || id > QOSTRACE_NR_SYSCALLS || + !test_bit(id, qostrace_enabled)) + return; + + spin_lock(&qostrace_lock); + if ((qostrace_last + 1) % QOSTRACE_MAX_EVENTS != qostrace_first) { + event = &qostrace_events[qostrace_last]; + qostrace_last = (qostrace_last + 1) % QOSTRACE_MAX_EVENTS; + + event->event = id | evmask; + /* XXX could be done better, see how it works */ + event->pid = current->tgid; + qostrace_clock(&event->time); + } + spin_unlock(&qostrace_lock); +} + +static int __init qostrace_init(void) +{ + spin_lock_init(&qostrace_lock); + return misc_register(&qostrace_miscdev); +} +late_initcall(qostrace_init); +#else +static inline void qostrace_syscall(struct pt_regs *regs, int evmask) +{ +} +#endif + /* * Determines whether a value may be installed in a segment register. */ @@ -1421,6 +1583,9 @@ asmregparm long syscall_trace_enter(stru tracehook_report_syscall_entry(regs)) ret = -1L; + if (unlikely(test_thread_flag(TIF_SYSCALL_QOSTRACE))) + qostrace_syscall(regs, QOSTRACE_ENTER); + if (unlikely(current->audit_context)) { if (IS_IA32) audit_syscall_entry(AUDIT_ARCH_I386, @@ -1447,6 +1612,9 @@ asmregparm void syscall_trace_leave(stru if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, 0); + if (unlikely(test_thread_flag(TIF_SYSCALL_QOSTRACE))) + qostrace_syscall(regs, QOSTRACE_LEAVE); + /* * If TIF_SYSCALL_EMU is set, we only get here because of * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).