diff --git a/Makefile b/Makefile index ad82601..f5cdb72 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29..ed2b862 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -5,6 +5,12 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" +config QTRACE + bool "Low overhead profiling of system calls" + default y + ---help--- + Use this only if you know what it means. + config STRICT_DEVMEM bool "Filter access to /dev/mem" ---help--- diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d27d0a2..540d116 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -95,7 +95,8 @@ struct thread_info { #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ -#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ +#define TIF_SYSCALL_TRACEPOINT 28 /* for ftrace syscall instrumentation */ +#define TIF_QTRACE 29 /* for me */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -119,16 +120,18 @@ struct thread_info { #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) +#define _TIF_QTRACE (1 << TIF_QTRACE) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_QTRACE) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_QTRACE) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -137,8 +140,8 @@ struct thread_info { _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) /* work to do on any return to user space */ -#define _TIF_ALLWORK_MASK \ - ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) +#define _TIF_ALLWORK_MASK ((0x0000FFFF & ~_TIF_SECCOMP) | \ + _TIF_SYSCALL_TRACEPOINT | _TIF_QTRACE) /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7b058a2..ce24af1 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,6 +22,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -1496,6 +1500,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->orig_ax); + if (unlikely(test_thread_flag(TIF_QTRACE))) + qtrace_syscall(regs, QTRACE_ENTER); + if (unlikely(current->audit_context)) { if (IS_IA32) audit_syscall_entry(AUDIT_ARCH_I386, @@ -1525,6 +1532,9 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, 0); + if (unlikely(test_thread_flag(TIF_QTRACE))) + qtrace_syscall(regs, QTRACE_LEAVE); + /* * If TIF_SYSCALL_EMU is set, we only get here because of * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). diff --git a/include/linux/qtrace.h b/include/linux/qtrace.h new file mode 100644 index 0000000..30ee4ac --- /dev/null +++ b/include/linux/qtrace.h @@ -0,0 +1,15 @@ +#ifndef _QTRACE_H +#define _QTRACE_H + +#define QTRACE_LEAVE 0x10000 +#define QTRACE_ENTER 0x20000 + +#ifdef CONFIG_QTRACE +void qtrace_syscall(struct pt_regs *regs, int evmask); +void qtrace_wakeup(struct task_struct *p, int evmask); +#else +static inline void qtrace_syscall(struct pt_regs *regs, int evmask) +{ +} +#endif +#endif diff --git a/kernel/Makefile b/kernel/Makefile index d7c13d2..c800bc6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -87,6 +87,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o +obj-$(CONFIG_QTRACE) += qtrace.o obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ diff --git a/kernel/qtrace.c b/kernel/qtrace.c new file mode 100644 index 0000000..f16f14f --- /dev/null +++ b/kernel/qtrace.c @@ -0,0 +1,204 @@ +#include +#include +#include +#include +#include +#include + +#define QTRACE_NR_SYSCALLS 300 +#define QTRACE_NR_RINGS 2 +#define QTRACE_MAX_EVENTS 32768 + +struct qtrace_event { + int event; + int pid; + struct timespec time; +}; + +struct qtrace_ring { + spinlock_t lock; + int first; + int last; + struct qtrace_event events[QTRACE_MAX_EVENTS]; +}; + +static DECLARE_BITMAP(qtrace_sc_enabled, QTRACE_NR_SYSCALLS); +static struct qtrace_ring qtrace_rings[QTRACE_NR_RINGS]; +static spinlock_t qtrace_lock; + +static inline int qtrace_get(int rn, char __user *buf) +{ + struct qtrace_ring *ring = &qtrace_rings[rn]; + struct qtrace_event *event; + + spin_lock_irq(&ring->lock); + if (ring->first == ring->last) { + spin_unlock(&ring->lock); + return 0; + } + + event = &ring->events[ring->first]; + ring->first = (ring->first + 1) % QTRACE_MAX_EVENTS; + spin_unlock_irq(&ring->lock); + + put_user(event->event, (u32 *)buf); + put_user(event->pid, (u32 *)buf + 1); + put_user(event->time.tv_sec, (u32 *)buf + 2); + put_user(event->time.tv_nsec, (u32 *)buf + 3); + + return 16; +} + +static ssize_t qtrace_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int i = 0, v = 1, ret = 0, rn = *ppos; + + if (rn < 0 || rn >= QTRACE_NR_RINGS) + return -EINVAL; + + for (i = 0; i < count / 16 && v; i++) { + v = qtrace_get(rn, buf); + ret += v; + } + + return ret; +} + +static ssize_t qtrace_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int cmd, id, enabled, rn, ret = 2 * sizeof(int); + struct qtrace_ring *ring; + struct task_struct *p; + + get_user(cmd, (int *)buf); + get_user(id, (int *)buf + 1); + get_user(enabled, (int *)buf + 2); + + spin_lock(&qtrace_lock); + switch (cmd) { + case 0: + if (id < 0 || id > QTRACE_NR_SYSCALLS) + ret = -EINVAL; + else if (enabled) + __set_bit(id, qtrace_sc_enabled); + else + __clear_bit(id, qtrace_sc_enabled); + break; + case 1: + p = find_task_by_vpid(id); + if (!p) + ret = -EINVAL; + else if (enabled) + set_tsk_thread_flag(p, TIF_QTRACE); + else + clear_tsk_thread_flag(p, TIF_QTRACE); + break; + case 2: + for (rn = 0; rn < QTRACE_NR_RINGS; rn++) { + ring = &qtrace_rings[rn]; + spin_lock_irq(&ring->lock); + ring->first = ring->last; + spin_unlock_irq(&ring->lock); + } + break; + default: + ret = -EINVAL; + } + spin_unlock(&qtrace_lock); + + return ret; +} + +static int qtrace_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int qtrace_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static const struct file_operations qtrace_dev_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = qtrace_read, + .write = qtrace_write, + .open = qtrace_open, + .release = qtrace_release, +}; + +static struct miscdevice qtrace_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "qtrace", + .fops = &qtrace_dev_fops, +}; + +extern void getnstimeofday(struct timespec *ts); + +static inline void qtrace_clock(struct timespec *time) +{ + getnstimeofday(time); +} + +static inline void qtrace_put(struct qtrace_ring *ring, + struct qtrace_event *event) +{ + struct qtrace_event *e; + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + if ((ring->last + 1) % QTRACE_MAX_EVENTS != ring->first) { + e = &ring->events[ring->last]; + ring->last = (ring->last + 1) % QTRACE_MAX_EVENTS; + + e->event = event->event; + e->pid = event->pid; + qtrace_clock(&e->time); + } + spin_unlock_irqrestore(&ring->lock, flags); +} + +void qtrace_syscall(struct pt_regs *regs, int evmask) +{ + struct qtrace_ring *ring = &qtrace_rings[0]; + struct qtrace_event event; + int id = regs->orig_ax; + + if (id < 0 || id > QTRACE_NR_SYSCALLS || + !test_bit(id, qtrace_sc_enabled)) + return; + + event.event = id | evmask; + /* XXX could be done better, see how it works */ + event.pid = current->tgid; + + qtrace_put(ring, &event); +} + +void qtrace_wakeup(struct task_struct *p, int evmask) +{ + struct qtrace_ring *ring = &qtrace_rings[1]; + struct qtrace_event event; + + event.event = 0; + event.pid = p->tgid; + + qtrace_put(ring, &event); +} + +static int __init qtrace_init(void) +{ + struct qtrace_ring *ring; + int i; + + spin_lock_init(&qtrace_lock); + for (i = 0; i < QTRACE_NR_RINGS; i++) { + ring = &qtrace_rings[i]; + spin_lock_init(&ring->lock); + } + return misc_register(&qtrace_miscdev); +} +late_initcall(qtrace_init); diff --git a/kernel/sched.c b/kernel/sched.c index 3c11ae0..b5a05c8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #include @@ -2434,6 +2435,8 @@ out_activate: out_running: trace_sched_wakeup(rq, p, success); + if (unlikely(test_ti_thread_flag(task_thread_info(p), TIF_QTRACE))) + qtrace_wakeup(p, 0); check_preempt_curr(rq, p, wake_flags); p->state = TASK_RUNNING;