kernel-ark/include/asm-ia64/spinlock.h
Christoph Lameter f521089158 [IA64] Spinlock optimizations
1. Nontemporal store for spin unlock.

A nontemporal store will not update the LRU setting for the cacheline. The
cacheline with the lock may therefore be evicted faster from the cpu
caches. Doing so may be useful since it increases the chance that the
exclusive cache line has been evicted when another cpu is trying to
acquire the lock.

The time between dropping and reacquiring a lock on the same cpu is
typically very small so the danger of the cacheline being
evicted is negligible.

2. Avoid semaphore operation in write_unlock and use nontemporal store

write_lock uses a cmpxchg like the regular spin_lock but write_unlock uses
clear_bit which requires a load and then a loop over a cmpxchg. The
following patch makes write_unlock simply use a nontemporal store to clear
the highest 8 bits. We will then still have the lower 3 bytes (24 bits)
left to count the readers.

Doing the byte store will reduce the number of possible readers from 2^31
to 2^24 = 16 million.

These patches were discussed already:

http://marc.theaimsgroup.com/?t=111472054400001&r=1&w=2
http://marc.theaimsgroup.com/?l=linux-ia64&m=111401837707849&w=2

The nontemporal stores will only work using GCC. If a compiler is used
that does not support inline asm then fallback C code is used. This
will preserve the byte store but not be able to do the nontemporal stores.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-08-10 16:13:10 -07:00

224 lines
6.7 KiB
C

#ifndef _ASM_IA64_SPINLOCK_H
#define _ASM_IA64_SPINLOCK_H
/*
* Copyright (C) 1998-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
*
* This file is used for SMP configurations only.
*/
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <asm/atomic.h>
#include <asm/bitops.h>
#include <asm/intrinsics.h>
#include <asm/system.h>
typedef struct {
volatile unsigned int lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t;
#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
#define spin_lock_init(x) ((x)->lock = 0)
#ifdef ASM_SUPPORTED
/*
* Try to get the lock. If we fail to get the lock, make a non-standard call to
* ia64_spinlock_contention(). We do not use a normal call because that would force all
* callers of spin_lock() to be non-leaf routines. Instead, ia64_spinlock_contention() is
* carefully coded to touch only those registers that spin_lock() marks "clobbered".
*/
#define IA64_SPINLOCK_CLOBBERS "ar.ccv", "ar.pfs", "p14", "p15", "r27", "r28", "r29", "r30", "b6", "memory"
static inline void
_raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
{
register volatile unsigned int *ptr asm ("r31") = &lock->lock;
#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
# ifdef CONFIG_ITANIUM
/* don't use brl on Itanium... */
asm volatile ("{\n\t"
" mov ar.ccv = r0\n\t"
" mov r28 = ip\n\t"
" mov r30 = 1;;\n\t"
"}\n\t"
"cmpxchg4.acq r30 = [%1], r30, ar.ccv\n\t"
"movl r29 = ia64_spinlock_contention_pre3_4;;\n\t"
"cmp4.ne p14, p0 = r30, r0\n\t"
"mov b6 = r29;;\n\t"
"mov r27=%2\n\t"
"(p14) br.cond.spnt.many b6"
: "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
# else
asm volatile ("{\n\t"
" mov ar.ccv = r0\n\t"
" mov r28 = ip\n\t"
" mov r30 = 1;;\n\t"
"}\n\t"
"cmpxchg4.acq r30 = [%1], r30, ar.ccv;;\n\t"
"cmp4.ne p14, p0 = r30, r0\n\t"
"mov r27=%2\n\t"
"(p14) brl.cond.spnt.many ia64_spinlock_contention_pre3_4;;"
: "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
# endif /* CONFIG_MCKINLEY */
#else
# ifdef CONFIG_ITANIUM
/* don't use brl on Itanium... */
/* mis-declare, so we get the entry-point, not it's function descriptor: */
asm volatile ("mov r30 = 1\n\t"
"mov r27=%2\n\t"
"mov ar.ccv = r0;;\n\t"
"cmpxchg4.acq r30 = [%0], r30, ar.ccv\n\t"
"movl r29 = ia64_spinlock_contention;;\n\t"
"cmp4.ne p14, p0 = r30, r0\n\t"
"mov b6 = r29;;\n\t"
"(p14) br.call.spnt.many b6 = b6"
: "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
# else
asm volatile ("mov r30 = 1\n\t"
"mov r27=%2\n\t"
"mov ar.ccv = r0;;\n\t"
"cmpxchg4.acq r30 = [%0], r30, ar.ccv;;\n\t"
"cmp4.ne p14, p0 = r30, r0\n\t"
"(p14) brl.call.spnt.many b6=ia64_spinlock_contention;;"
: "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
# endif /* CONFIG_MCKINLEY */
#endif
}
#define _raw_spin_lock(lock) _raw_spin_lock_flags(lock, 0)
/* Unlock by doing an ordered store and releasing the cacheline with nta */
static inline void _raw_spin_unlock(spinlock_t *x) {
barrier();
asm volatile ("st4.rel.nta [%0] = r0\n\t" :: "r"(x));
}
#else /* !ASM_SUPPORTED */
#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
# define _raw_spin_lock(x) \
do { \
__u32 *ia64_spinlock_ptr = (__u32 *) (x); \
__u64 ia64_spinlock_val; \
ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0); \
if (unlikely(ia64_spinlock_val)) { \
do { \
while (*ia64_spinlock_ptr) \
ia64_barrier(); \
ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0); \
} while (ia64_spinlock_val); \
} \
} while (0)
#define _raw_spin_unlock(x) do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0)
#endif /* !ASM_SUPPORTED */
#define spin_is_locked(x) ((x)->lock != 0)
#define _raw_spin_trylock(x) (cmpxchg_acq(&(x)->lock, 0, 1) == 0)
#define spin_unlock_wait(x) do { barrier(); } while ((x)->lock)
typedef struct {
volatile unsigned int read_counter : 24;
volatile unsigned int write_lock : 8;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t;
#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
#define read_can_lock(rw) (*(volatile int *)(rw) >= 0)
#define write_can_lock(rw) (*(volatile int *)(rw) == 0)
#define _raw_read_lock(rw) \
do { \
rwlock_t *__read_lock_ptr = (rw); \
\
while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) { \
ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \
while (*(volatile int *)__read_lock_ptr < 0) \
cpu_relax(); \
} \
} while (0)
#define _raw_read_unlock(rw) \
do { \
rwlock_t *__read_lock_ptr = (rw); \
ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \
} while (0)
#ifdef ASM_SUPPORTED
#define _raw_write_lock(rw) \
do { \
__asm__ __volatile__ ( \
"mov ar.ccv = r0\n" \
"dep r29 = -1, r0, 31, 1;;\n" \
"1:\n" \
"ld4 r2 = [%0];;\n" \
"cmp4.eq p0,p7 = r0,r2\n" \
"(p7) br.cond.spnt.few 1b \n" \
"cmpxchg4.acq r2 = [%0], r29, ar.ccv;;\n" \
"cmp4.eq p0,p7 = r0, r2\n" \
"(p7) br.cond.spnt.few 1b;;\n" \
:: "r"(rw) : "ar.ccv", "p7", "r2", "r29", "memory"); \
} while(0)
#define _raw_write_trylock(rw) \
({ \
register long result; \
\
__asm__ __volatile__ ( \
"mov ar.ccv = r0\n" \
"dep r29 = -1, r0, 31, 1;;\n" \
"cmpxchg4.acq %0 = [%1], r29, ar.ccv\n" \
: "=r"(result) : "r"(rw) : "ar.ccv", "r29", "memory"); \
(result == 0); \
})
static inline void _raw_write_unlock(rwlock_t *x)
{
u8 *y = (u8 *)x;
barrier();
asm volatile ("st1.rel.nta [%0] = r0\n\t" :: "r"(y+3) : "memory" );
}
#else /* !ASM_SUPPORTED */
#define _raw_write_lock(l) \
({ \
__u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1); \
__u32 *ia64_write_lock_ptr = (__u32 *) (l); \
do { \
while (*ia64_write_lock_ptr) \
ia64_barrier(); \
ia64_val = ia64_cmpxchg4_acq(ia64_write_lock_ptr, ia64_set_val, 0); \
} while (ia64_val); \
})
#define _raw_write_trylock(rw) \
({ \
__u64 ia64_val; \
__u64 ia64_set_val = ia64_dep_mi(-1, 0, 31,1); \
ia64_val = ia64_cmpxchg4_acq((__u32 *)(rw), ia64_set_val, 0); \
(ia64_val == 0); \
})
static inline void _raw_write_unlock(rwlock_t *x)
{
barrier();
x->write_lock = 0;
}
#endif /* !ASM_SUPPORTED */
#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
#endif /* _ASM_IA64_SPINLOCK_H */