kernel-ark/arch/x86/lib/delay.c
Pallipadi, Venkatesh e888d7facd x86, delay: tsc based udelay should have rdtsc_barrier
delay_tsc needs rdtsc_barrier to provide proper delay.

Output from a test driver using hpet to cross check delay
provided by udelay().

Before:
[   86.794363] Expected delay 5us actual 4679ns
[   87.154362] Expected delay 5us actual 698ns
[   87.514162] Expected delay 5us actual 4539ns
[   88.653716] Expected delay 5us actual 4539ns
[   94.664106] Expected delay 10us actual 9638ns
[   95.049351] Expected delay 10us actual 10126ns
[   95.416110] Expected delay 10us actual 9568ns
[   95.799216] Expected delay 10us actual 9638ns
[  103.624104] Expected delay 10us actual 9707ns
[  104.020619] Expected delay 10us actual 768ns
[  104.419951] Expected delay 10us actual 9707ns

After:
[   50.983320] Expected delay 5us actual 5587ns
[   51.261807] Expected delay 5us actual 5587ns
[   51.565715] Expected delay 5us actual 5657ns
[   51.861171] Expected delay 5us actual 5587ns
[   52.164704] Expected delay 5us actual 5726ns
[   52.487457] Expected delay 5us actual 5657ns
[   52.789338] Expected delay 5us actual 5726ns
[   57.119680] Expected delay 10us actual 10755ns
[   57.893997] Expected delay 10us actual 10615ns
[   58.261287] Expected delay 10us actual 10755ns
[   58.620505] Expected delay 10us actual 10825ns
[   58.941035] Expected delay 10us actual 10755ns
[   59.320903] Expected delay 10us actual 10615ns
[   61.306311] Expected delay 10us actual 10755ns
[   61.520542] Expected delay 10us actual 10615ns

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-06-25 16:47:40 -07:00

141 lines
2.8 KiB
C

/*
* Precise Delay Loops for i386
*
* Copyright (C) 1993 Linus Torvalds
* Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
* Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
*
* The __delay function must _NOT_ be inlined as its execution time
* depends wildly on alignment on many x86 processors. The additional
* jump magic is needed to get the timing stable on all the CPU's
* we have to worry about.
*/
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/timex.h>
#include <linux/preempt.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <asm/processor.h>
#include <asm/delay.h>
#include <asm/timer.h>
#ifdef CONFIG_SMP
# include <asm/smp.h>
#endif
/* simple loop based delay: */
static void delay_loop(unsigned long loops)
{
asm volatile(
" test %0,%0 \n"
" jz 3f \n"
" jmp 1f \n"
".align 16 \n"
"1: jmp 2f \n"
".align 16 \n"
"2: dec %0 \n"
" jnz 2b \n"
"3: dec %0 \n"
: /* we don't need output */
:"a" (loops)
);
}
/* TSC based delay: */
static void delay_tsc(unsigned long loops)
{
unsigned long bclock, now;
int cpu;
preempt_disable();
cpu = smp_processor_id();
rdtsc_barrier();
rdtscl(bclock);
for (;;) {
rdtsc_barrier();
rdtscl(now);
if ((now - bclock) >= loops)
break;
/* Allow RT tasks to run */
preempt_enable();
rep_nop();
preempt_disable();
/*
* It is possible that we moved to another CPU, and
* since TSC's are per-cpu we need to calculate
* that. The delay must guarantee that we wait "at
* least" the amount of time. Being moved to another
* CPU could make the wait longer but we just need to
* make sure we waited long enough. Rebalance the
* counter for this CPU.
*/
if (unlikely(cpu != smp_processor_id())) {
loops -= (now - bclock);
cpu = smp_processor_id();
rdtsc_barrier();
rdtscl(bclock);
}
}
preempt_enable();
}
/*
* Since we calibrate only once at boot, this
* function should be set once at boot and not changed
*/
static void (*delay_fn)(unsigned long) = delay_loop;
void use_tsc_delay(void)
{
delay_fn = delay_tsc;
}
int __devinit read_current_timer(unsigned long *timer_val)
{
if (delay_fn == delay_tsc) {
rdtscll(*timer_val);
return 0;
}
return -1;
}
void __delay(unsigned long loops)
{
delay_fn(loops);
}
EXPORT_SYMBOL(__delay);
inline void __const_udelay(unsigned long xloops)
{
int d0;
xloops *= 4;
asm("mull %%edx"
:"=d" (xloops), "=&a" (d0)
:"1" (xloops), "0"
(cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4)));
__delay(++xloops);
}
EXPORT_SYMBOL(__const_udelay);
void __udelay(unsigned long usecs)
{
__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
}
EXPORT_SYMBOL(__udelay);
void __ndelay(unsigned long nsecs)
{
__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
}
EXPORT_SYMBOL(__ndelay);