kernel-ark/include/linux/hrtimer.h
Oleg Nesterov 080344b988 hrtimer: fix *rmtp handling in hrtimer_nanosleep()
Spotted by Pavel Emelyanov and Alexey Dobriyan.

hrtimer_nanosleep() sets restart_block->arg1 = rmtp, but this rmtp points to
the local variable which lives in the caller's stack frame. This means that
if sys_restart_syscall() actually happens and it is interrupted as well, we
don't update the user-space variable, but write into the already dead stack
frame.

Introduced by commit 04c227140f
hrtimer: Rework hrtimer_nanosleep to make sys_compat_nanosleep easier

Change the callers to pass "__user *rmtp" to hrtimer_nanosleep(), and change
hrtimer_nanosleep() to use copy_to_user() to actually update *rmtp.

Small problem remains. man 2 nanosleep states that *rtmp should be written if
nanosleep() was interrupted (it says nothing whether it is OK to update *rmtp
if nanosleep returns 0), but (with or without this patch) we can dirty *rem
even if nanosleep() returns 0.

NOTE: this patch doesn't change compat_sys_nanosleep(), because it has other
bugs. Fixed by the next patch.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Alexey Dobriyan <adobriyan@sw.ru>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Pavel Emelyanov <xemul@sw.ru>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Toyo Abe <toyoa@mvista.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

 include/linux/hrtimer.h |    2 -
 kernel/hrtimer.c        |   51 +++++++++++++++++++++++++-----------------------
 kernel/posix-timers.c   |   14 +------------
 3 files changed, 30 insertions(+), 37 deletions(-)
2008-02-10 10:48:03 +01:00

385 lines
11 KiB
C

/*
* include/linux/hrtimer.h
*
* hrtimers - High-resolution kernel timers
*
* Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005, Red Hat, Inc., Ingo Molnar
*
* data type definitions, declarations, prototypes
*
* Started by: Thomas Gleixner and Ingo Molnar
*
* For licencing details see kernel-base/COPYING
*/
#ifndef _LINUX_HRTIMER_H
#define _LINUX_HRTIMER_H
#include <linux/rbtree.h>
#include <linux/ktime.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/wait.h>
struct hrtimer_clock_base;
struct hrtimer_cpu_base;
/*
* Mode arguments of xxx_hrtimer functions:
*/
enum hrtimer_mode {
HRTIMER_MODE_ABS, /* Time value is absolute */
HRTIMER_MODE_REL, /* Time value is relative to now */
};
/*
* Return values for the callback function
*/
enum hrtimer_restart {
HRTIMER_NORESTART, /* Timer is not restarted */
HRTIMER_RESTART, /* Timer must be restarted */
};
/*
* hrtimer callback modes:
*
* HRTIMER_CB_SOFTIRQ: Callback must run in softirq context
* HRTIMER_CB_IRQSAFE: Callback may run in hardirq context
* HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and
* does not restart the timer
* HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: Callback must run in hardirq context
* Special mode for tick emultation
*/
enum hrtimer_cb_mode {
HRTIMER_CB_SOFTIRQ,
HRTIMER_CB_IRQSAFE,
HRTIMER_CB_IRQSAFE_NO_RESTART,
HRTIMER_CB_IRQSAFE_NO_SOFTIRQ,
};
/*
* Values to track state of the timer
*
* Possible states:
*
* 0x00 inactive
* 0x01 enqueued into rbtree
* 0x02 callback function running
* 0x04 callback pending (high resolution mode)
*
* Special case:
* 0x03 callback function running and enqueued
* (was requeued on another CPU)
* The "callback function running and enqueued" status is only possible on
* SMP. It happens for example when a posix timer expired and the callback
* queued a signal. Between dropping the lock which protects the posix timer
* and reacquiring the base lock of the hrtimer, another CPU can deliver the
* signal and rearm the timer. We have to preserve the callback running state,
* as otherwise the timer could be removed before the softirq code finishes the
* the handling of the timer.
*
* The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state to
* preserve the HRTIMER_STATE_CALLBACK bit in the above scenario.
*
* All state transitions are protected by cpu_base->lock.
*/
#define HRTIMER_STATE_INACTIVE 0x00
#define HRTIMER_STATE_ENQUEUED 0x01
#define HRTIMER_STATE_CALLBACK 0x02
#define HRTIMER_STATE_PENDING 0x04
/**
* struct hrtimer - the basic hrtimer structure
* @node: red black tree node for time ordered insertion
* @expires: the absolute expiry time in the hrtimers internal
* representation. The time is related to the clock on
* which the timer is based.
* @function: timer expiry callback function
* @base: pointer to the timer base (per cpu and per clock)
* @state: state information (See bit values above)
* @cb_mode: high resolution timer feature to select the callback execution
* mode
* @cb_entry: list head to enqueue an expired timer into the callback list
* @start_site: timer statistics field to store the site where the timer
* was started
* @start_comm: timer statistics field to store the name of the process which
* started the timer
* @start_pid: timer statistics field to store the pid of the task which
* started the timer
*
* The hrtimer structure must be initialized by hrtimer_init()
*/
struct hrtimer {
struct rb_node node;
ktime_t expires;
enum hrtimer_restart (*function)(struct hrtimer *);
struct hrtimer_clock_base *base;
unsigned long state;
enum hrtimer_cb_mode cb_mode;
struct list_head cb_entry;
#ifdef CONFIG_TIMER_STATS
void *start_site;
char start_comm[16];
int start_pid;
#endif
};
/**
* struct hrtimer_sleeper - simple sleeper structure
* @timer: embedded timer structure
* @task: task to wake up
*
* task is set to NULL, when the timer expires.
*/
struct hrtimer_sleeper {
struct hrtimer timer;
struct task_struct *task;
};
/**
* struct hrtimer_clock_base - the timer base for a specific clock
* @cpu_base: per cpu clock base
* @index: clock type index for per_cpu support when moving a
* timer to a base on another cpu.
* @active: red black tree root node for the active timers
* @first: pointer to the timer node which expires first
* @resolution: the resolution of the clock, in nanoseconds
* @get_time: function to retrieve the current time of the clock
* @get_softirq_time: function to retrieve the current time from the softirq
* @softirq_time: the time when running the hrtimer queue in the softirq
* @offset: offset of this clock to the monotonic base
* @reprogram: function to reprogram the timer event
*/
struct hrtimer_clock_base {
struct hrtimer_cpu_base *cpu_base;
clockid_t index;
struct rb_root active;
struct rb_node *first;
ktime_t resolution;
ktime_t (*get_time)(void);
ktime_t (*get_softirq_time)(void);
ktime_t softirq_time;
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t offset;
int (*reprogram)(struct hrtimer *t,
struct hrtimer_clock_base *b,
ktime_t n);
#endif
};
#define HRTIMER_MAX_CLOCK_BASES 2
/*
* struct hrtimer_cpu_base - the per cpu clock bases
* @lock: lock protecting the base and associated clock bases
* and timers
* @lock_key: the lock_class_key for use with lockdep
* @clock_base: array of clock bases for this cpu
* @curr_timer: the timer which is executing a callback right now
* @expires_next: absolute time of the next event which was scheduled
* via clock_set_next_event()
* @hres_active: State of high resolution mode
* @check_clocks: Indictator, when set evaluate time source and clock
* event devices whether high resolution mode can be
* activated.
* @cb_pending: Expired timers are moved from the rbtree to this
* list in the timer interrupt. The list is processed
* in the softirq.
* @nr_events: Total number of timer interrupt events
*/
struct hrtimer_cpu_base {
spinlock_t lock;
struct lock_class_key lock_key;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
struct list_head cb_pending;
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next;
int hres_active;
unsigned long nr_events;
#endif
};
#ifdef CONFIG_HIGH_RES_TIMERS
struct clock_event_device;
extern void clock_was_set(void);
extern void hres_timers_resume(void);
extern void hrtimer_interrupt(struct clock_event_device *dev);
/*
* In high resolution mode the time reference must be read accurate
*/
static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
{
return timer->base->get_time();
}
static inline int hrtimer_is_hres_active(struct hrtimer *timer)
{
return timer->base->cpu_base->hres_active;
}
/*
* The resolution of the clocks. The resolution value is returned in
* the clock_getres() system call to give application programmers an
* idea of the (in)accuracy of timers. Timer values are rounded up to
* this resolution values.
*/
# define HIGH_RES_NSEC 1
# define KTIME_HIGH_RES (ktime_t) { .tv64 = HIGH_RES_NSEC }
# define MONOTONIC_RES_NSEC HIGH_RES_NSEC
# define KTIME_MONOTONIC_RES KTIME_HIGH_RES
#else
# define MONOTONIC_RES_NSEC LOW_RES_NSEC
# define KTIME_MONOTONIC_RES KTIME_LOW_RES
/*
* clock_was_set() is a NOP for non- high-resolution systems. The
* time-sorted order guarantees that a timer does not expire early and
* is expired in the next softirq when the clock was advanced.
*/
static inline void clock_was_set(void) { }
static inline void hres_timers_resume(void) { }
/*
* In non high resolution mode the time reference is taken from
* the base softirq time variable.
*/
static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
{
return timer->base->softirq_time;
}
static inline int hrtimer_is_hres_active(struct hrtimer *timer)
{
return 0;
}
#endif
extern ktime_t ktime_get(void);
extern ktime_t ktime_get_real(void);
/* Exported timer functions: */
/* Initialize timers: */
extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
enum hrtimer_mode mode);
/* Basic timer operations: */
extern int hrtimer_start(struct hrtimer *timer, ktime_t tim,
const enum hrtimer_mode mode);
extern int hrtimer_cancel(struct hrtimer *timer);
extern int hrtimer_try_to_cancel(struct hrtimer *timer);
static inline int hrtimer_restart(struct hrtimer *timer)
{
return hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
}
/* Query timers: */
extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
extern ktime_t hrtimer_get_next_event(void);
/*
* A timer is active, when it is enqueued into the rbtree or the callback
* function is running.
*/
static inline int hrtimer_active(const struct hrtimer *timer)
{
return timer->state != HRTIMER_STATE_INACTIVE;
}
/*
* Helper function to check, whether the timer is on one of the queues
*/
static inline int hrtimer_is_queued(struct hrtimer *timer)
{
return timer->state &
(HRTIMER_STATE_ENQUEUED | HRTIMER_STATE_PENDING);
}
/* Forward a hrtimer so it expires after now: */
extern u64
hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
/* Forward a hrtimer so it expires after the hrtimer's current now */
static inline u64 hrtimer_forward_now(struct hrtimer *timer,
ktime_t interval)
{
return hrtimer_forward(timer, timer->base->get_time(), interval);
}
/* Precise sleep: */
extern long hrtimer_nanosleep(struct timespec *rqtp,
struct timespec __user *rmtp,
const enum hrtimer_mode mode,
const clockid_t clockid);
extern long hrtimer_nanosleep_restart(struct restart_block *restart_block);
extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
struct task_struct *tsk);
/* Soft interrupt function to run the hrtimer queues: */
extern void hrtimer_run_queues(void);
extern void hrtimer_run_pending(void);
/* Bootup initialization: */
extern void __init hrtimers_init(void);
#if BITS_PER_LONG < 64
extern u64 ktime_divns(const ktime_t kt, s64 div);
#else /* BITS_PER_LONG < 64 */
# define ktime_divns(kt, div) (u64)((kt).tv64 / (div))
#endif
/* Show pending timers: */
extern void sysrq_timer_list_show(void);
/*
* Timer-statistics info:
*/
#ifdef CONFIG_TIMER_STATS
extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
void *timerf, char *comm,
unsigned int timer_flag);
static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
{
timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
timer->function, timer->start_comm, 0);
}
extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer,
void *addr);
static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
{
__timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0));
}
static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
{
timer->start_site = NULL;
}
#else
static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
{
}
static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
{
}
static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
{
}
#endif
#endif