85f2b08268
Add a 'trigger' file for each trace event, enabling 'trace event triggers' to be set for trace events. 'trace event triggers' are patterned after the existing 'ftrace function triggers' implementation except that triggers are written to per-event 'trigger' files instead of to a single file such as the 'set_ftrace_filter' used for ftrace function triggers. The implementation is meant to be entirely separate from ftrace function triggers, in order to keep the respective implementations relatively simple and to allow them to diverge. The event trigger functionality is built on top of SOFT_DISABLE functionality. It adds a TRIGGER_MODE bit to the ftrace_event_file flags which is checked when any trace event fires. Triggers set for a particular event need to be checked regardless of whether that event is actually enabled or not - getting an event to fire even if it's not enabled is what's already implemented by SOFT_DISABLE mode, so trigger mode directly reuses that. Event trigger essentially inherit the soft disable logic in __ftrace_event_enable_disable() while adding a bit of logic and trigger reference counting via tm_ref on top of that in a new trace_event_trigger_enable_disable() function. Because the base __ftrace_event_enable_disable() code now needs to be invoked from outside trace_events.c, a wrapper is also added for those usages. The triggers for an event are actually invoked via a new function, event_triggers_call(), and code is also added to invoke them for ftrace_raw_event calls as well as syscall events. The main part of the patch creates a new trace_events_trigger.c file to contain the trace event triggers implementation. The standard open, read, and release file operations are implemented here. The open() implementation sets up for the various open modes of the 'trigger' file. It creates and attaches the trigger iterator and sets up the command parser. If opened for reading set up the trigger seq_ops. The read() implementation parses the event trigger written to the 'trigger' file, looks up the trigger command, and passes it along to that event_command's func() implementation for command-specific processing. The release() implementation does whatever cleanup is needed to release the 'trigger' file, like releasing the parser and trigger iterator, etc. A couple of functions for event command registration and unregistration are added, along with a list to add them to and a mutex to protect them, as well as an (initially empty) registration function to add the set of commands that will be added by future commits, and call to it from the trace event initialization code. also added are a couple trigger-specific data structures needed for these implementations such as a trigger iterator and a struct for trigger-specific data. A couple structs consisting mostly of function meant to be implemented in command-specific ways, event_command and event_trigger_ops, are used by the generic event trigger command implementations. They're being put into trace.h alongside the other trace_event data structures and functions, in the expectation that they'll be needed in several trace_event-related files such as trace_events_trigger.c and trace_events.c. The event_command.func() function is meant to be called by the trigger parsing code in order to add a trigger instance to the corresponding event. It essentially coordinates adding a live trigger instance to the event, and arming the triggering the event. Every event_command func() implementation essentially does the same thing for any command: - choose ops - use the value of param to choose either a number or count version of event_trigger_ops specific to the command - do the register or unregister of those ops - associate a filter, if specified, with the triggering event The reg() and unreg() ops allow command-specific implementations for event_trigger_op registration and unregistration, and the get_trigger_ops() op allows command-specific event_trigger_ops selection to be parameterized. When a trigger instance is added, the reg() op essentially adds that trigger to the triggering event and arms it, while unreg() does the opposite. The set_filter() function is used to associate a filter with the trigger - if the command doesn't specify a set_filter() implementation, the command will ignore filters. Each command has an associated trigger_type, which serves double duty, both as a unique identifier for the command as well as a value that can be used for setting a trigger mode bit during trigger invocation. The signature of func() adds a pointer to the event_command struct, used to invoke those functions, along with a command_data param that can be passed to the reg/unreg functions. This allows func() implementations to use command-specific blobs and supports code re-use. The event_trigger_ops.func() command corrsponds to the trigger 'probe' function that gets called when the triggering event is actually invoked. The other functions are used to list the trigger when needed, along with a couple mundane book-keeping functions. This also moves event_file_data() into trace.h so it can be used outside of trace_events.c. Link: http://lkml.kernel.org/r/316d95061accdee070aac8e5750afba0192fa5b9.1382622043.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com> Idea-by: Steve Rostedt <rostedt@goodmis.org> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
769 lines
19 KiB
C
769 lines
19 KiB
C
#include <trace/syscall.h>
|
|
#include <trace/events/syscalls.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
|
|
#include <linux/ftrace.h>
|
|
#include <linux/perf_event.h>
|
|
#include <asm/syscall.h>
|
|
|
|
#include "trace_output.h"
|
|
#include "trace.h"
|
|
|
|
static DEFINE_MUTEX(syscall_trace_lock);
|
|
|
|
static int syscall_enter_register(struct ftrace_event_call *event,
|
|
enum trace_reg type, void *data);
|
|
static int syscall_exit_register(struct ftrace_event_call *event,
|
|
enum trace_reg type, void *data);
|
|
|
|
static struct list_head *
|
|
syscall_get_enter_fields(struct ftrace_event_call *call)
|
|
{
|
|
struct syscall_metadata *entry = call->data;
|
|
|
|
return &entry->enter_fields;
|
|
}
|
|
|
|
extern struct syscall_metadata *__start_syscalls_metadata[];
|
|
extern struct syscall_metadata *__stop_syscalls_metadata[];
|
|
|
|
static struct syscall_metadata **syscalls_metadata;
|
|
|
|
#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
|
|
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
|
|
{
|
|
/*
|
|
* Only compare after the "sys" prefix. Archs that use
|
|
* syscall wrappers may have syscalls symbols aliases prefixed
|
|
* with ".SyS" or ".sys" instead of "sys", leading to an unwanted
|
|
* mismatch.
|
|
*/
|
|
return !strcmp(sym + 3, name + 3);
|
|
}
|
|
#endif
|
|
|
|
#ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
|
|
/*
|
|
* Some architectures that allow for 32bit applications
|
|
* to run on a 64bit kernel, do not map the syscalls for
|
|
* the 32bit tasks the same as they do for 64bit tasks.
|
|
*
|
|
* *cough*x86*cough*
|
|
*
|
|
* In such a case, instead of reporting the wrong syscalls,
|
|
* simply ignore them.
|
|
*
|
|
* For an arch to ignore the compat syscalls it needs to
|
|
* define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
|
|
* define the function arch_trace_is_compat_syscall() to let
|
|
* the tracing system know that it should ignore it.
|
|
*/
|
|
static int
|
|
trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
|
|
{
|
|
if (unlikely(arch_trace_is_compat_syscall(regs)))
|
|
return -1;
|
|
|
|
return syscall_get_nr(task, regs);
|
|
}
|
|
#else
|
|
static inline int
|
|
trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
|
|
{
|
|
return syscall_get_nr(task, regs);
|
|
}
|
|
#endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */
|
|
|
|
static __init struct syscall_metadata *
|
|
find_syscall_meta(unsigned long syscall)
|
|
{
|
|
struct syscall_metadata **start;
|
|
struct syscall_metadata **stop;
|
|
char str[KSYM_SYMBOL_LEN];
|
|
|
|
|
|
start = __start_syscalls_metadata;
|
|
stop = __stop_syscalls_metadata;
|
|
kallsyms_lookup(syscall, NULL, NULL, NULL, str);
|
|
|
|
if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
|
|
return NULL;
|
|
|
|
for ( ; start < stop; start++) {
|
|
if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
|
|
return *start;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static struct syscall_metadata *syscall_nr_to_meta(int nr)
|
|
{
|
|
if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
|
|
return NULL;
|
|
|
|
return syscalls_metadata[nr];
|
|
}
|
|
|
|
static enum print_line_t
|
|
print_syscall_enter(struct trace_iterator *iter, int flags,
|
|
struct trace_event *event)
|
|
{
|
|
struct trace_seq *s = &iter->seq;
|
|
struct trace_entry *ent = iter->ent;
|
|
struct syscall_trace_enter *trace;
|
|
struct syscall_metadata *entry;
|
|
int i, ret, syscall;
|
|
|
|
trace = (typeof(trace))ent;
|
|
syscall = trace->nr;
|
|
entry = syscall_nr_to_meta(syscall);
|
|
|
|
if (!entry)
|
|
goto end;
|
|
|
|
if (entry->enter_event->event.type != ent->type) {
|
|
WARN_ON_ONCE(1);
|
|
goto end;
|
|
}
|
|
|
|
ret = trace_seq_printf(s, "%s(", entry->name);
|
|
if (!ret)
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
for (i = 0; i < entry->nb_args; i++) {
|
|
/* parameter types */
|
|
if (trace_flags & TRACE_ITER_VERBOSE) {
|
|
ret = trace_seq_printf(s, "%s ", entry->types[i]);
|
|
if (!ret)
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
}
|
|
/* parameter values */
|
|
ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
|
|
trace->args[i],
|
|
i == entry->nb_args - 1 ? "" : ", ");
|
|
if (!ret)
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
}
|
|
|
|
ret = trace_seq_putc(s, ')');
|
|
if (!ret)
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
end:
|
|
ret = trace_seq_putc(s, '\n');
|
|
if (!ret)
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
return TRACE_TYPE_HANDLED;
|
|
}
|
|
|
|
static enum print_line_t
|
|
print_syscall_exit(struct trace_iterator *iter, int flags,
|
|
struct trace_event *event)
|
|
{
|
|
struct trace_seq *s = &iter->seq;
|
|
struct trace_entry *ent = iter->ent;
|
|
struct syscall_trace_exit *trace;
|
|
int syscall;
|
|
struct syscall_metadata *entry;
|
|
int ret;
|
|
|
|
trace = (typeof(trace))ent;
|
|
syscall = trace->nr;
|
|
entry = syscall_nr_to_meta(syscall);
|
|
|
|
if (!entry) {
|
|
trace_seq_putc(s, '\n');
|
|
return TRACE_TYPE_HANDLED;
|
|
}
|
|
|
|
if (entry->exit_event->event.type != ent->type) {
|
|
WARN_ON_ONCE(1);
|
|
return TRACE_TYPE_UNHANDLED;
|
|
}
|
|
|
|
ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
|
|
trace->ret);
|
|
if (!ret)
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
return TRACE_TYPE_HANDLED;
|
|
}
|
|
|
|
extern char *__bad_type_size(void);
|
|
|
|
#define SYSCALL_FIELD(type, name) \
|
|
sizeof(type) != sizeof(trace.name) ? \
|
|
__bad_type_size() : \
|
|
#type, #name, offsetof(typeof(trace), name), \
|
|
sizeof(trace.name), is_signed_type(type)
|
|
|
|
static int __init
|
|
__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
|
|
{
|
|
int i;
|
|
int pos = 0;
|
|
|
|
/* When len=0, we just calculate the needed length */
|
|
#define LEN_OR_ZERO (len ? len - pos : 0)
|
|
|
|
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
|
|
for (i = 0; i < entry->nb_args; i++) {
|
|
pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
|
|
entry->args[i], sizeof(unsigned long),
|
|
i == entry->nb_args - 1 ? "" : ", ");
|
|
}
|
|
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
|
|
|
|
for (i = 0; i < entry->nb_args; i++) {
|
|
pos += snprintf(buf + pos, LEN_OR_ZERO,
|
|
", ((unsigned long)(REC->%s))", entry->args[i]);
|
|
}
|
|
|
|
#undef LEN_OR_ZERO
|
|
|
|
/* return the length of print_fmt */
|
|
return pos;
|
|
}
|
|
|
|
static int __init set_syscall_print_fmt(struct ftrace_event_call *call)
|
|
{
|
|
char *print_fmt;
|
|
int len;
|
|
struct syscall_metadata *entry = call->data;
|
|
|
|
if (entry->enter_event != call) {
|
|
call->print_fmt = "\"0x%lx\", REC->ret";
|
|
return 0;
|
|
}
|
|
|
|
/* First: called with 0 length to calculate the needed length */
|
|
len = __set_enter_print_fmt(entry, NULL, 0);
|
|
|
|
print_fmt = kmalloc(len + 1, GFP_KERNEL);
|
|
if (!print_fmt)
|
|
return -ENOMEM;
|
|
|
|
/* Second: actually write the @print_fmt */
|
|
__set_enter_print_fmt(entry, print_fmt, len + 1);
|
|
call->print_fmt = print_fmt;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void __init free_syscall_print_fmt(struct ftrace_event_call *call)
|
|
{
|
|
struct syscall_metadata *entry = call->data;
|
|
|
|
if (entry->enter_event == call)
|
|
kfree(call->print_fmt);
|
|
}
|
|
|
|
static int __init syscall_enter_define_fields(struct ftrace_event_call *call)
|
|
{
|
|
struct syscall_trace_enter trace;
|
|
struct syscall_metadata *meta = call->data;
|
|
int ret;
|
|
int i;
|
|
int offset = offsetof(typeof(trace), args);
|
|
|
|
ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
|
|
if (ret)
|
|
return ret;
|
|
|
|
for (i = 0; i < meta->nb_args; i++) {
|
|
ret = trace_define_field(call, meta->types[i],
|
|
meta->args[i], offset,
|
|
sizeof(unsigned long), 0,
|
|
FILTER_OTHER);
|
|
offset += sizeof(unsigned long);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int __init syscall_exit_define_fields(struct ftrace_event_call *call)
|
|
{
|
|
struct syscall_trace_exit trace;
|
|
int ret;
|
|
|
|
ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
|
|
FILTER_OTHER);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
|
|
{
|
|
struct trace_array *tr = data;
|
|
struct ftrace_event_file *ftrace_file;
|
|
struct syscall_trace_enter *entry;
|
|
struct syscall_metadata *sys_data;
|
|
struct ring_buffer_event *event;
|
|
struct ring_buffer *buffer;
|
|
unsigned long irq_flags;
|
|
int pc;
|
|
int syscall_nr;
|
|
int size;
|
|
|
|
syscall_nr = trace_get_syscall_nr(current, regs);
|
|
if (syscall_nr < 0)
|
|
return;
|
|
|
|
/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
|
|
ftrace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
|
|
if (!ftrace_file)
|
|
return;
|
|
|
|
if (test_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &ftrace_file->flags))
|
|
event_triggers_call(ftrace_file);
|
|
if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
|
|
return;
|
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
if (!sys_data)
|
|
return;
|
|
|
|
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
|
|
|
|
local_save_flags(irq_flags);
|
|
pc = preempt_count();
|
|
|
|
buffer = tr->trace_buffer.buffer;
|
|
event = trace_buffer_lock_reserve(buffer,
|
|
sys_data->enter_event->event.type, size, irq_flags, pc);
|
|
if (!event)
|
|
return;
|
|
|
|
entry = ring_buffer_event_data(event);
|
|
entry->nr = syscall_nr;
|
|
syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
|
|
|
|
if (!filter_check_discard(ftrace_file, entry, buffer, event))
|
|
trace_current_buffer_unlock_commit(buffer, event,
|
|
irq_flags, pc);
|
|
}
|
|
|
|
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
|
|
{
|
|
struct trace_array *tr = data;
|
|
struct ftrace_event_file *ftrace_file;
|
|
struct syscall_trace_exit *entry;
|
|
struct syscall_metadata *sys_data;
|
|
struct ring_buffer_event *event;
|
|
struct ring_buffer *buffer;
|
|
unsigned long irq_flags;
|
|
int pc;
|
|
int syscall_nr;
|
|
|
|
syscall_nr = trace_get_syscall_nr(current, regs);
|
|
if (syscall_nr < 0)
|
|
return;
|
|
|
|
/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
|
|
ftrace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
|
|
if (!ftrace_file)
|
|
return;
|
|
|
|
if (test_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &ftrace_file->flags))
|
|
event_triggers_call(ftrace_file);
|
|
if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
|
|
return;
|
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
if (!sys_data)
|
|
return;
|
|
|
|
local_save_flags(irq_flags);
|
|
pc = preempt_count();
|
|
|
|
buffer = tr->trace_buffer.buffer;
|
|
event = trace_buffer_lock_reserve(buffer,
|
|
sys_data->exit_event->event.type, sizeof(*entry),
|
|
irq_flags, pc);
|
|
if (!event)
|
|
return;
|
|
|
|
entry = ring_buffer_event_data(event);
|
|
entry->nr = syscall_nr;
|
|
entry->ret = syscall_get_return_value(current, regs);
|
|
|
|
if (!filter_check_discard(ftrace_file, entry, buffer, event))
|
|
trace_current_buffer_unlock_commit(buffer, event,
|
|
irq_flags, pc);
|
|
}
|
|
|
|
static int reg_event_syscall_enter(struct ftrace_event_file *file,
|
|
struct ftrace_event_call *call)
|
|
{
|
|
struct trace_array *tr = file->tr;
|
|
int ret = 0;
|
|
int num;
|
|
|
|
num = ((struct syscall_metadata *)call->data)->syscall_nr;
|
|
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
|
|
return -ENOSYS;
|
|
mutex_lock(&syscall_trace_lock);
|
|
if (!tr->sys_refcount_enter)
|
|
ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
|
|
if (!ret) {
|
|
rcu_assign_pointer(tr->enter_syscall_files[num], file);
|
|
tr->sys_refcount_enter++;
|
|
}
|
|
mutex_unlock(&syscall_trace_lock);
|
|
return ret;
|
|
}
|
|
|
|
static void unreg_event_syscall_enter(struct ftrace_event_file *file,
|
|
struct ftrace_event_call *call)
|
|
{
|
|
struct trace_array *tr = file->tr;
|
|
int num;
|
|
|
|
num = ((struct syscall_metadata *)call->data)->syscall_nr;
|
|
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
|
|
return;
|
|
mutex_lock(&syscall_trace_lock);
|
|
tr->sys_refcount_enter--;
|
|
rcu_assign_pointer(tr->enter_syscall_files[num], NULL);
|
|
if (!tr->sys_refcount_enter)
|
|
unregister_trace_sys_enter(ftrace_syscall_enter, tr);
|
|
mutex_unlock(&syscall_trace_lock);
|
|
}
|
|
|
|
static int reg_event_syscall_exit(struct ftrace_event_file *file,
|
|
struct ftrace_event_call *call)
|
|
{
|
|
struct trace_array *tr = file->tr;
|
|
int ret = 0;
|
|
int num;
|
|
|
|
num = ((struct syscall_metadata *)call->data)->syscall_nr;
|
|
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
|
|
return -ENOSYS;
|
|
mutex_lock(&syscall_trace_lock);
|
|
if (!tr->sys_refcount_exit)
|
|
ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
|
|
if (!ret) {
|
|
rcu_assign_pointer(tr->exit_syscall_files[num], file);
|
|
tr->sys_refcount_exit++;
|
|
}
|
|
mutex_unlock(&syscall_trace_lock);
|
|
return ret;
|
|
}
|
|
|
|
static void unreg_event_syscall_exit(struct ftrace_event_file *file,
|
|
struct ftrace_event_call *call)
|
|
{
|
|
struct trace_array *tr = file->tr;
|
|
int num;
|
|
|
|
num = ((struct syscall_metadata *)call->data)->syscall_nr;
|
|
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
|
|
return;
|
|
mutex_lock(&syscall_trace_lock);
|
|
tr->sys_refcount_exit--;
|
|
rcu_assign_pointer(tr->exit_syscall_files[num], NULL);
|
|
if (!tr->sys_refcount_exit)
|
|
unregister_trace_sys_exit(ftrace_syscall_exit, tr);
|
|
mutex_unlock(&syscall_trace_lock);
|
|
}
|
|
|
|
static int __init init_syscall_trace(struct ftrace_event_call *call)
|
|
{
|
|
int id;
|
|
int num;
|
|
|
|
num = ((struct syscall_metadata *)call->data)->syscall_nr;
|
|
if (num < 0 || num >= NR_syscalls) {
|
|
pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
|
|
((struct syscall_metadata *)call->data)->name);
|
|
return -ENOSYS;
|
|
}
|
|
|
|
if (set_syscall_print_fmt(call) < 0)
|
|
return -ENOMEM;
|
|
|
|
id = trace_event_raw_init(call);
|
|
|
|
if (id < 0) {
|
|
free_syscall_print_fmt(call);
|
|
return id;
|
|
}
|
|
|
|
return id;
|
|
}
|
|
|
|
struct trace_event_functions enter_syscall_print_funcs = {
|
|
.trace = print_syscall_enter,
|
|
};
|
|
|
|
struct trace_event_functions exit_syscall_print_funcs = {
|
|
.trace = print_syscall_exit,
|
|
};
|
|
|
|
struct ftrace_event_class __refdata event_class_syscall_enter = {
|
|
.system = "syscalls",
|
|
.reg = syscall_enter_register,
|
|
.define_fields = syscall_enter_define_fields,
|
|
.get_fields = syscall_get_enter_fields,
|
|
.raw_init = init_syscall_trace,
|
|
};
|
|
|
|
struct ftrace_event_class __refdata event_class_syscall_exit = {
|
|
.system = "syscalls",
|
|
.reg = syscall_exit_register,
|
|
.define_fields = syscall_exit_define_fields,
|
|
.fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
|
|
.raw_init = init_syscall_trace,
|
|
};
|
|
|
|
unsigned long __init __weak arch_syscall_addr(int nr)
|
|
{
|
|
return (unsigned long)sys_call_table[nr];
|
|
}
|
|
|
|
static int __init init_ftrace_syscalls(void)
|
|
{
|
|
struct syscall_metadata *meta;
|
|
unsigned long addr;
|
|
int i;
|
|
|
|
syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
|
|
GFP_KERNEL);
|
|
if (!syscalls_metadata) {
|
|
WARN_ON(1);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
for (i = 0; i < NR_syscalls; i++) {
|
|
addr = arch_syscall_addr(i);
|
|
meta = find_syscall_meta(addr);
|
|
if (!meta)
|
|
continue;
|
|
|
|
meta->syscall_nr = i;
|
|
syscalls_metadata[i] = meta;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
early_initcall(init_ftrace_syscalls);
|
|
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
|
|
static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
|
|
static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
|
|
static int sys_perf_refcount_enter;
|
|
static int sys_perf_refcount_exit;
|
|
|
|
static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
|
{
|
|
struct syscall_metadata *sys_data;
|
|
struct syscall_trace_enter *rec;
|
|
struct hlist_head *head;
|
|
int syscall_nr;
|
|
int rctx;
|
|
int size;
|
|
|
|
syscall_nr = trace_get_syscall_nr(current, regs);
|
|
if (syscall_nr < 0)
|
|
return;
|
|
if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
|
|
return;
|
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
if (!sys_data)
|
|
return;
|
|
|
|
head = this_cpu_ptr(sys_data->enter_event->perf_events);
|
|
if (hlist_empty(head))
|
|
return;
|
|
|
|
/* get the size after alignment with the u32 buffer size field */
|
|
size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
|
|
size = ALIGN(size + sizeof(u32), sizeof(u64));
|
|
size -= sizeof(u32);
|
|
|
|
rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
|
|
sys_data->enter_event->event.type, regs, &rctx);
|
|
if (!rec)
|
|
return;
|
|
|
|
rec->nr = syscall_nr;
|
|
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
|
|
(unsigned long *)&rec->args);
|
|
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
|
|
}
|
|
|
|
static int perf_sysenter_enable(struct ftrace_event_call *call)
|
|
{
|
|
int ret = 0;
|
|
int num;
|
|
|
|
num = ((struct syscall_metadata *)call->data)->syscall_nr;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
if (!sys_perf_refcount_enter)
|
|
ret = register_trace_sys_enter(perf_syscall_enter, NULL);
|
|
if (ret) {
|
|
pr_info("event trace: Could not activate"
|
|
"syscall entry trace point");
|
|
} else {
|
|
set_bit(num, enabled_perf_enter_syscalls);
|
|
sys_perf_refcount_enter++;
|
|
}
|
|
mutex_unlock(&syscall_trace_lock);
|
|
return ret;
|
|
}
|
|
|
|
static void perf_sysenter_disable(struct ftrace_event_call *call)
|
|
{
|
|
int num;
|
|
|
|
num = ((struct syscall_metadata *)call->data)->syscall_nr;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
sys_perf_refcount_enter--;
|
|
clear_bit(num, enabled_perf_enter_syscalls);
|
|
if (!sys_perf_refcount_enter)
|
|
unregister_trace_sys_enter(perf_syscall_enter, NULL);
|
|
mutex_unlock(&syscall_trace_lock);
|
|
}
|
|
|
|
static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
|
{
|
|
struct syscall_metadata *sys_data;
|
|
struct syscall_trace_exit *rec;
|
|
struct hlist_head *head;
|
|
int syscall_nr;
|
|
int rctx;
|
|
int size;
|
|
|
|
syscall_nr = trace_get_syscall_nr(current, regs);
|
|
if (syscall_nr < 0)
|
|
return;
|
|
if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
|
|
return;
|
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
if (!sys_data)
|
|
return;
|
|
|
|
head = this_cpu_ptr(sys_data->exit_event->perf_events);
|
|
if (hlist_empty(head))
|
|
return;
|
|
|
|
/* We can probably do that at build time */
|
|
size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
|
|
size -= sizeof(u32);
|
|
|
|
rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
|
|
sys_data->exit_event->event.type, regs, &rctx);
|
|
if (!rec)
|
|
return;
|
|
|
|
rec->nr = syscall_nr;
|
|
rec->ret = syscall_get_return_value(current, regs);
|
|
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
|
|
}
|
|
|
|
static int perf_sysexit_enable(struct ftrace_event_call *call)
|
|
{
|
|
int ret = 0;
|
|
int num;
|
|
|
|
num = ((struct syscall_metadata *)call->data)->syscall_nr;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
if (!sys_perf_refcount_exit)
|
|
ret = register_trace_sys_exit(perf_syscall_exit, NULL);
|
|
if (ret) {
|
|
pr_info("event trace: Could not activate"
|
|
"syscall exit trace point");
|
|
} else {
|
|
set_bit(num, enabled_perf_exit_syscalls);
|
|
sys_perf_refcount_exit++;
|
|
}
|
|
mutex_unlock(&syscall_trace_lock);
|
|
return ret;
|
|
}
|
|
|
|
static void perf_sysexit_disable(struct ftrace_event_call *call)
|
|
{
|
|
int num;
|
|
|
|
num = ((struct syscall_metadata *)call->data)->syscall_nr;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
sys_perf_refcount_exit--;
|
|
clear_bit(num, enabled_perf_exit_syscalls);
|
|
if (!sys_perf_refcount_exit)
|
|
unregister_trace_sys_exit(perf_syscall_exit, NULL);
|
|
mutex_unlock(&syscall_trace_lock);
|
|
}
|
|
|
|
#endif /* CONFIG_PERF_EVENTS */
|
|
|
|
static int syscall_enter_register(struct ftrace_event_call *event,
|
|
enum trace_reg type, void *data)
|
|
{
|
|
struct ftrace_event_file *file = data;
|
|
|
|
switch (type) {
|
|
case TRACE_REG_REGISTER:
|
|
return reg_event_syscall_enter(file, event);
|
|
case TRACE_REG_UNREGISTER:
|
|
unreg_event_syscall_enter(file, event);
|
|
return 0;
|
|
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
case TRACE_REG_PERF_REGISTER:
|
|
return perf_sysenter_enable(event);
|
|
case TRACE_REG_PERF_UNREGISTER:
|
|
perf_sysenter_disable(event);
|
|
return 0;
|
|
case TRACE_REG_PERF_OPEN:
|
|
case TRACE_REG_PERF_CLOSE:
|
|
case TRACE_REG_PERF_ADD:
|
|
case TRACE_REG_PERF_DEL:
|
|
return 0;
|
|
#endif
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int syscall_exit_register(struct ftrace_event_call *event,
|
|
enum trace_reg type, void *data)
|
|
{
|
|
struct ftrace_event_file *file = data;
|
|
|
|
switch (type) {
|
|
case TRACE_REG_REGISTER:
|
|
return reg_event_syscall_exit(file, event);
|
|
case TRACE_REG_UNREGISTER:
|
|
unreg_event_syscall_exit(file, event);
|
|
return 0;
|
|
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
case TRACE_REG_PERF_REGISTER:
|
|
return perf_sysexit_enable(event);
|
|
case TRACE_REG_PERF_UNREGISTER:
|
|
perf_sysexit_disable(event);
|
|
return 0;
|
|
case TRACE_REG_PERF_OPEN:
|
|
case TRACE_REG_PERF_CLOSE:
|
|
case TRACE_REG_PERF_ADD:
|
|
case TRACE_REG_PERF_DEL:
|
|
return 0;
|
|
#endif
|
|
}
|
|
return 0;
|
|
}
|