fb0fadf9b2
This change adds support for a new ptrace option, PTRACE_O_TRACESECCOMP,
and a new return value for seccomp BPF programs, SECCOMP_RET_TRACE.
When a tracer specifies the PTRACE_O_TRACESECCOMP ptrace option, the
tracer will be notified, via PTRACE_EVENT_SECCOMP, for any syscall that
results in a BPF program returning SECCOMP_RET_TRACE. The 16-bit
SECCOMP_RET_DATA mask of the BPF program return value will be passed as
the ptrace_message and may be retrieved using PTRACE_GETEVENTMSG.
If the subordinate process is not using seccomp filter, then no
system call notifications will occur even if the option is specified.
If there is no tracer with PTRACE_O_TRACESECCOMP when SECCOMP_RET_TRACE
is returned, the system call will not be executed and an -ENOSYS errno
will be returned to userspace.
This change adds a dependency on the system call slow path. Any future
efforts to use the system call fast path for seccomp filter will need to
address this restriction.
Signed-off-by: Will Drewry <wad@chromium.org>
Acked-by: Eric Paris <eparis@redhat.com>
v18: - rebase
- comment fatal_signal check
- acked-by
- drop secure_computing_int comment
v17: - ...
v16: - update PT_TRACE_MASK to 0xbf4 so that STOP isn't clear on SETOPTIONS call (indan@nul.nu)
[note PT_TRACE_MASK disappears in linux-next]
v15: - add audit support for non-zero return codes
- clean up style (indan@nul.nu)
v14: - rebase/nochanges
v13: - rebase on to 88ebdda615
(Brings back a change to ptrace.c and the masks.)
v12: - rebase to linux-next
- use ptrace_event and update arch/Kconfig to mention slow-path dependency
- drop all tracehook changes and inclusion (oleg@redhat.com)
v11: - invert the logic to just make it a PTRACE_SYSCALL accelerator
(indan@nul.nu)
v10: - moved to PTRACE_O_SECCOMP / PT_TRACE_SECCOMP
v9: - n/a
v8: - guarded PTRACE_SECCOMP use with an ifdef
v7: - introduced
Signed-off-by: James Morris <james.l.morris@oracle.com>
420 lines
15 KiB
C
420 lines
15 KiB
C
#ifndef _LINUX_PTRACE_H
|
|
#define _LINUX_PTRACE_H
|
|
/* ptrace.h */
|
|
/* structs and defines to help the user use the ptrace system call. */
|
|
|
|
/* has the defines to get at the registers. */
|
|
|
|
#define PTRACE_TRACEME 0
|
|
#define PTRACE_PEEKTEXT 1
|
|
#define PTRACE_PEEKDATA 2
|
|
#define PTRACE_PEEKUSR 3
|
|
#define PTRACE_POKETEXT 4
|
|
#define PTRACE_POKEDATA 5
|
|
#define PTRACE_POKEUSR 6
|
|
#define PTRACE_CONT 7
|
|
#define PTRACE_KILL 8
|
|
#define PTRACE_SINGLESTEP 9
|
|
|
|
#define PTRACE_ATTACH 16
|
|
#define PTRACE_DETACH 17
|
|
|
|
#define PTRACE_SYSCALL 24
|
|
|
|
/* 0x4200-0x4300 are reserved for architecture-independent additions. */
|
|
#define PTRACE_SETOPTIONS 0x4200
|
|
#define PTRACE_GETEVENTMSG 0x4201
|
|
#define PTRACE_GETSIGINFO 0x4202
|
|
#define PTRACE_SETSIGINFO 0x4203
|
|
|
|
/*
|
|
* Generic ptrace interface that exports the architecture specific regsets
|
|
* using the corresponding NT_* types (which are also used in the core dump).
|
|
* Please note that the NT_PRSTATUS note type in a core dump contains a full
|
|
* 'struct elf_prstatus'. But the user_regset for NT_PRSTATUS contains just the
|
|
* elf_gregset_t that is the pr_reg field of 'struct elf_prstatus'. For all the
|
|
* other user_regset flavors, the user_regset layout and the ELF core dump note
|
|
* payload are exactly the same layout.
|
|
*
|
|
* This interface usage is as follows:
|
|
* struct iovec iov = { buf, len};
|
|
*
|
|
* ret = ptrace(PTRACE_GETREGSET/PTRACE_SETREGSET, pid, NT_XXX_TYPE, &iov);
|
|
*
|
|
* On the successful completion, iov.len will be updated by the kernel,
|
|
* specifying how much the kernel has written/read to/from the user's iov.buf.
|
|
*/
|
|
#define PTRACE_GETREGSET 0x4204
|
|
#define PTRACE_SETREGSET 0x4205
|
|
|
|
#define PTRACE_SEIZE 0x4206
|
|
#define PTRACE_INTERRUPT 0x4207
|
|
#define PTRACE_LISTEN 0x4208
|
|
|
|
/* Wait extended result codes for the above trace options. */
|
|
#define PTRACE_EVENT_FORK 1
|
|
#define PTRACE_EVENT_VFORK 2
|
|
#define PTRACE_EVENT_CLONE 3
|
|
#define PTRACE_EVENT_EXEC 4
|
|
#define PTRACE_EVENT_VFORK_DONE 5
|
|
#define PTRACE_EVENT_EXIT 6
|
|
#define PTRACE_EVENT_SECCOMP 7
|
|
/* Extended result codes which enabled by means other than options. */
|
|
#define PTRACE_EVENT_STOP 128
|
|
|
|
/* Options set using PTRACE_SETOPTIONS or using PTRACE_SEIZE @data param */
|
|
#define PTRACE_O_TRACESYSGOOD 1
|
|
#define PTRACE_O_TRACEFORK (1 << PTRACE_EVENT_FORK)
|
|
#define PTRACE_O_TRACEVFORK (1 << PTRACE_EVENT_VFORK)
|
|
#define PTRACE_O_TRACECLONE (1 << PTRACE_EVENT_CLONE)
|
|
#define PTRACE_O_TRACEEXEC (1 << PTRACE_EVENT_EXEC)
|
|
#define PTRACE_O_TRACEVFORKDONE (1 << PTRACE_EVENT_VFORK_DONE)
|
|
#define PTRACE_O_TRACEEXIT (1 << PTRACE_EVENT_EXIT)
|
|
#define PTRACE_O_TRACESECCOMP (1 << PTRACE_EVENT_SECCOMP)
|
|
|
|
#define PTRACE_O_MASK 0x000000ff
|
|
|
|
#include <asm/ptrace.h>
|
|
|
|
#ifdef __KERNEL__
|
|
/*
|
|
* Ptrace flags
|
|
*
|
|
* The owner ship rules for task->ptrace which holds the ptrace
|
|
* flags is simple. When a task is running it owns it's task->ptrace
|
|
* flags. When the a task is stopped the ptracer owns task->ptrace.
|
|
*/
|
|
|
|
#define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */
|
|
#define PT_PTRACED 0x00000001
|
|
#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */
|
|
#define PT_PTRACE_CAP 0x00000004 /* ptracer can follow suid-exec */
|
|
|
|
#define PT_OPT_FLAG_SHIFT 3
|
|
/* PT_TRACE_* event enable flags */
|
|
#define PT_EVENT_FLAG(event) (1 << (PT_OPT_FLAG_SHIFT + (event)))
|
|
#define PT_TRACESYSGOOD PT_EVENT_FLAG(0)
|
|
#define PT_TRACE_FORK PT_EVENT_FLAG(PTRACE_EVENT_FORK)
|
|
#define PT_TRACE_VFORK PT_EVENT_FLAG(PTRACE_EVENT_VFORK)
|
|
#define PT_TRACE_CLONE PT_EVENT_FLAG(PTRACE_EVENT_CLONE)
|
|
#define PT_TRACE_EXEC PT_EVENT_FLAG(PTRACE_EVENT_EXEC)
|
|
#define PT_TRACE_VFORK_DONE PT_EVENT_FLAG(PTRACE_EVENT_VFORK_DONE)
|
|
#define PT_TRACE_EXIT PT_EVENT_FLAG(PTRACE_EVENT_EXIT)
|
|
#define PT_TRACE_SECCOMP PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP)
|
|
|
|
/* single stepping state bits (used on ARM and PA-RISC) */
|
|
#define PT_SINGLESTEP_BIT 31
|
|
#define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT)
|
|
#define PT_BLOCKSTEP_BIT 30
|
|
#define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT)
|
|
|
|
#include <linux/compiler.h> /* For unlikely. */
|
|
#include <linux/sched.h> /* For struct task_struct. */
|
|
#include <linux/err.h> /* for IS_ERR_VALUE */
|
|
#include <linux/bug.h> /* For BUG_ON. */
|
|
|
|
|
|
extern long arch_ptrace(struct task_struct *child, long request,
|
|
unsigned long addr, unsigned long data);
|
|
extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
|
|
extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
|
|
extern void ptrace_disable(struct task_struct *);
|
|
extern int ptrace_check_attach(struct task_struct *task, bool ignore_state);
|
|
extern int ptrace_request(struct task_struct *child, long request,
|
|
unsigned long addr, unsigned long data);
|
|
extern void ptrace_notify(int exit_code);
|
|
extern void __ptrace_link(struct task_struct *child,
|
|
struct task_struct *new_parent);
|
|
extern void __ptrace_unlink(struct task_struct *child);
|
|
extern void exit_ptrace(struct task_struct *tracer);
|
|
#define PTRACE_MODE_READ 0x01
|
|
#define PTRACE_MODE_ATTACH 0x02
|
|
#define PTRACE_MODE_NOAUDIT 0x04
|
|
/* Returns 0 on success, -errno on denial. */
|
|
extern int __ptrace_may_access(struct task_struct *task, unsigned int mode);
|
|
/* Returns true on success, false on denial. */
|
|
extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
|
|
|
|
static inline int ptrace_reparented(struct task_struct *child)
|
|
{
|
|
return !same_thread_group(child->real_parent, child->parent);
|
|
}
|
|
|
|
static inline void ptrace_unlink(struct task_struct *child)
|
|
{
|
|
if (unlikely(child->ptrace))
|
|
__ptrace_unlink(child);
|
|
}
|
|
|
|
int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
|
|
unsigned long data);
|
|
int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
|
|
unsigned long data);
|
|
|
|
/**
|
|
* ptrace_parent - return the task that is tracing the given task
|
|
* @task: task to consider
|
|
*
|
|
* Returns %NULL if no one is tracing @task, or the &struct task_struct
|
|
* pointer to its tracer.
|
|
*
|
|
* Must called under rcu_read_lock(). The pointer returned might be kept
|
|
* live only by RCU. During exec, this may be called with task_lock() held
|
|
* on @task, still held from when check_unsafe_exec() was called.
|
|
*/
|
|
static inline struct task_struct *ptrace_parent(struct task_struct *task)
|
|
{
|
|
if (unlikely(task->ptrace))
|
|
return rcu_dereference(task->parent);
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* ptrace_event_enabled - test whether a ptrace event is enabled
|
|
* @task: ptracee of interest
|
|
* @event: %PTRACE_EVENT_* to test
|
|
*
|
|
* Test whether @event is enabled for ptracee @task.
|
|
*
|
|
* Returns %true if @event is enabled, %false otherwise.
|
|
*/
|
|
static inline bool ptrace_event_enabled(struct task_struct *task, int event)
|
|
{
|
|
return task->ptrace & PT_EVENT_FLAG(event);
|
|
}
|
|
|
|
/**
|
|
* ptrace_event - possibly stop for a ptrace event notification
|
|
* @event: %PTRACE_EVENT_* value to report
|
|
* @message: value for %PTRACE_GETEVENTMSG to return
|
|
*
|
|
* Check whether @event is enabled and, if so, report @event and @message
|
|
* to the ptrace parent.
|
|
*
|
|
* Called without locks.
|
|
*/
|
|
static inline void ptrace_event(int event, unsigned long message)
|
|
{
|
|
if (unlikely(ptrace_event_enabled(current, event))) {
|
|
current->ptrace_message = message;
|
|
ptrace_notify((event << 8) | SIGTRAP);
|
|
} else if (event == PTRACE_EVENT_EXEC) {
|
|
/* legacy EXEC report via SIGTRAP */
|
|
if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED)
|
|
send_sig(SIGTRAP, current, 0);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* ptrace_init_task - initialize ptrace state for a new child
|
|
* @child: new child task
|
|
* @ptrace: true if child should be ptrace'd by parent's tracer
|
|
*
|
|
* This is called immediately after adding @child to its parent's children
|
|
* list. @ptrace is false in the normal case, and true to ptrace @child.
|
|
*
|
|
* Called with current's siglock and write_lock_irq(&tasklist_lock) held.
|
|
*/
|
|
static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
|
|
{
|
|
INIT_LIST_HEAD(&child->ptrace_entry);
|
|
INIT_LIST_HEAD(&child->ptraced);
|
|
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
|
atomic_set(&child->ptrace_bp_refcnt, 1);
|
|
#endif
|
|
child->jobctl = 0;
|
|
child->ptrace = 0;
|
|
child->parent = child->real_parent;
|
|
|
|
if (unlikely(ptrace) && current->ptrace) {
|
|
child->ptrace = current->ptrace;
|
|
__ptrace_link(child, current->parent);
|
|
|
|
if (child->ptrace & PT_SEIZED)
|
|
task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
|
|
else
|
|
sigaddset(&child->pending.signal, SIGSTOP);
|
|
|
|
set_tsk_thread_flag(child, TIF_SIGPENDING);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* ptrace_release_task - final ptrace-related cleanup of a zombie being reaped
|
|
* @task: task in %EXIT_DEAD state
|
|
*
|
|
* Called with write_lock(&tasklist_lock) held.
|
|
*/
|
|
static inline void ptrace_release_task(struct task_struct *task)
|
|
{
|
|
BUG_ON(!list_empty(&task->ptraced));
|
|
ptrace_unlink(task);
|
|
BUG_ON(!list_empty(&task->ptrace_entry));
|
|
}
|
|
|
|
#ifndef force_successful_syscall_return
|
|
/*
|
|
* System call handlers that, upon successful completion, need to return a
|
|
* negative value should call force_successful_syscall_return() right before
|
|
* returning. On architectures where the syscall convention provides for a
|
|
* separate error flag (e.g., alpha, ia64, ppc{,64}, sparc{,64}, possibly
|
|
* others), this macro can be used to ensure that the error flag will not get
|
|
* set. On architectures which do not support a separate error flag, the macro
|
|
* is a no-op and the spurious error condition needs to be filtered out by some
|
|
* other means (e.g., in user-level, by passing an extra argument to the
|
|
* syscall handler, or something along those lines).
|
|
*/
|
|
#define force_successful_syscall_return() do { } while (0)
|
|
#endif
|
|
|
|
#ifndef is_syscall_success
|
|
/*
|
|
* On most systems we can tell if a syscall is a success based on if the retval
|
|
* is an error value. On some systems like ia64 and powerpc they have different
|
|
* indicators of success/failure and must define their own.
|
|
*/
|
|
#define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs))))
|
|
#endif
|
|
|
|
/*
|
|
* <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__.
|
|
*
|
|
* These do-nothing inlines are used when the arch does not
|
|
* implement single-step. The kerneldoc comments are here
|
|
* to document the interface for all arch definitions.
|
|
*/
|
|
|
|
#ifndef arch_has_single_step
|
|
/**
|
|
* arch_has_single_step - does this CPU support user-mode single-step?
|
|
*
|
|
* If this is defined, then there must be function declarations or
|
|
* inlines for user_enable_single_step() and user_disable_single_step().
|
|
* arch_has_single_step() should evaluate to nonzero iff the machine
|
|
* supports instruction single-step for user mode.
|
|
* It can be a constant or it can test a CPU feature bit.
|
|
*/
|
|
#define arch_has_single_step() (0)
|
|
|
|
/**
|
|
* user_enable_single_step - single-step in user-mode task
|
|
* @task: either current or a task stopped in %TASK_TRACED
|
|
*
|
|
* This can only be called when arch_has_single_step() has returned nonzero.
|
|
* Set @task so that when it returns to user mode, it will trap after the
|
|
* next single instruction executes. If arch_has_block_step() is defined,
|
|
* this must clear the effects of user_enable_block_step() too.
|
|
*/
|
|
static inline void user_enable_single_step(struct task_struct *task)
|
|
{
|
|
BUG(); /* This can never be called. */
|
|
}
|
|
|
|
/**
|
|
* user_disable_single_step - cancel user-mode single-step
|
|
* @task: either current or a task stopped in %TASK_TRACED
|
|
*
|
|
* Clear @task of the effects of user_enable_single_step() and
|
|
* user_enable_block_step(). This can be called whether or not either
|
|
* of those was ever called on @task, and even if arch_has_single_step()
|
|
* returned zero.
|
|
*/
|
|
static inline void user_disable_single_step(struct task_struct *task)
|
|
{
|
|
}
|
|
#else
|
|
extern void user_enable_single_step(struct task_struct *);
|
|
extern void user_disable_single_step(struct task_struct *);
|
|
#endif /* arch_has_single_step */
|
|
|
|
#ifndef arch_has_block_step
|
|
/**
|
|
* arch_has_block_step - does this CPU support user-mode block-step?
|
|
*
|
|
* If this is defined, then there must be a function declaration or inline
|
|
* for user_enable_block_step(), and arch_has_single_step() must be defined
|
|
* too. arch_has_block_step() should evaluate to nonzero iff the machine
|
|
* supports step-until-branch for user mode. It can be a constant or it
|
|
* can test a CPU feature bit.
|
|
*/
|
|
#define arch_has_block_step() (0)
|
|
|
|
/**
|
|
* user_enable_block_step - step until branch in user-mode task
|
|
* @task: either current or a task stopped in %TASK_TRACED
|
|
*
|
|
* This can only be called when arch_has_block_step() has returned nonzero,
|
|
* and will never be called when single-instruction stepping is being used.
|
|
* Set @task so that when it returns to user mode, it will trap after the
|
|
* next branch or trap taken.
|
|
*/
|
|
static inline void user_enable_block_step(struct task_struct *task)
|
|
{
|
|
BUG(); /* This can never be called. */
|
|
}
|
|
#else
|
|
extern void user_enable_block_step(struct task_struct *);
|
|
#endif /* arch_has_block_step */
|
|
|
|
#ifdef ARCH_HAS_USER_SINGLE_STEP_INFO
|
|
extern void user_single_step_siginfo(struct task_struct *tsk,
|
|
struct pt_regs *regs, siginfo_t *info);
|
|
#else
|
|
static inline void user_single_step_siginfo(struct task_struct *tsk,
|
|
struct pt_regs *regs, siginfo_t *info)
|
|
{
|
|
memset(info, 0, sizeof(*info));
|
|
info->si_signo = SIGTRAP;
|
|
}
|
|
#endif
|
|
|
|
#ifndef arch_ptrace_stop_needed
|
|
/**
|
|
* arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called
|
|
* @code: current->exit_code value ptrace will stop with
|
|
* @info: siginfo_t pointer (or %NULL) for signal ptrace will stop with
|
|
*
|
|
* This is called with the siglock held, to decide whether or not it's
|
|
* necessary to release the siglock and call arch_ptrace_stop() with the
|
|
* same @code and @info arguments. It can be defined to a constant if
|
|
* arch_ptrace_stop() is never required, or always is. On machines where
|
|
* this makes sense, it should be defined to a quick test to optimize out
|
|
* calling arch_ptrace_stop() when it would be superfluous. For example,
|
|
* if the thread has not been back to user mode since the last stop, the
|
|
* thread state might indicate that nothing needs to be done.
|
|
*/
|
|
#define arch_ptrace_stop_needed(code, info) (0)
|
|
#endif
|
|
|
|
#ifndef arch_ptrace_stop
|
|
/**
|
|
* arch_ptrace_stop - Do machine-specific work before stopping for ptrace
|
|
* @code: current->exit_code value ptrace will stop with
|
|
* @info: siginfo_t pointer (or %NULL) for signal ptrace will stop with
|
|
*
|
|
* This is called with no locks held when arch_ptrace_stop_needed() has
|
|
* just returned nonzero. It is allowed to block, e.g. for user memory
|
|
* access. The arch can have machine-specific work to be done before
|
|
* ptrace stops. On ia64, register backing store gets written back to user
|
|
* memory here. Since this can be costly (requires dropping the siglock),
|
|
* we only do it when the arch requires it for this particular stop, as
|
|
* indicated by arch_ptrace_stop_needed().
|
|
*/
|
|
#define arch_ptrace_stop(code, info) do { } while (0)
|
|
#endif
|
|
|
|
extern int task_current_syscall(struct task_struct *target, long *callno,
|
|
unsigned long args[6], unsigned int maxargs,
|
|
unsigned long *sp, unsigned long *pc);
|
|
|
|
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
|
extern int ptrace_get_breakpoints(struct task_struct *tsk);
|
|
extern void ptrace_put_breakpoints(struct task_struct *tsk);
|
|
#else
|
|
static inline void ptrace_put_breakpoints(struct task_struct *tsk) { }
|
|
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
|
|
|
|
#endif /* __KERNEL */
|
|
|
|
#endif
|