kernel-ark/include/linux/sunrpc/cache.h
Greg Banks 7b2b1fee30 [PATCH] knfsd: knfsd: cache ipmap per TCP socket
Speed up high call-rate workloads by caching the struct ip_map for the peer on
the connected struct svc_sock instead of looking it up in the ip_map cache
hashtable on every call.  This helps workloads using AUTH_SYS authentication
over TCP.

Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16
synthetic client threads simulating an rsync (i.e.  recursive directory
listing) workload reading from an i386 RH9 install image (161480 regular files
in 10841 directories) on the server.  That tree is small enough to fill in the
server's RAM so no disk traffic was involved.  This setup gives a sustained
call rate in excess of 60000 calls/sec before being CPU-bound on the server.

Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each
CPU, and ip_map_lookup() was taking 2.9%.  This patch drops both contribution
into the profile noise.

Note that the above result overstates this value of this patch for most
workloads.  The synthetic clients are all using separate IP addresses, so
there are 64 entries in the ip_map cache hash.  Because the kernel measured
contained the bug fixed in commit

commit 1f1e030bf7

and was running on 64bit little-endian machine, probably all of those 64
entries were on a single chain, thus increasing the cost of ip_map_lookup().

With a modern kernel you would need more clients to see the same amount of
performance improvement.  This patch has helped to scale knfsd to handle a
deployment with 2000 NFS clients.

Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-04 07:55:16 -07:00

214 lines
6.3 KiB
C

/*
* include/linux/sunrpc/cache.h
*
* Generic code for various authentication-related caches
* used by sunrpc clients and servers.
*
* Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au>
*
* Released under terms in GPL version 2. See COPYING.
*
*/
#ifndef _LINUX_SUNRPC_CACHE_H_
#define _LINUX_SUNRPC_CACHE_H_
#include <linux/slab.h>
#include <asm/atomic.h>
#include <linux/proc_fs.h>
/*
* Each cache requires:
* - A 'struct cache_detail' which contains information specific to the cache
* for common code to use.
* - An item structure that must contain a "struct cache_head"
* - A lookup function defined using DefineCacheLookup
* - A 'put' function that can release a cache item. It will only
* be called after cache_put has succeed, so there are guarantee
* to be no references.
* - A function to calculate a hash of an item's key.
*
* as well as assorted code fragments (e.g. compare keys) and numbers
* (e.g. hash size, goal_age, etc).
*
* Each cache must be registered so that it can be cleaned regularly.
* When the cache is unregistered, it is flushed completely.
*
* Entries have a ref count and a 'hashed' flag which counts the existance
* in the hash table.
* We only expire entries when refcount is zero.
* Existance in the cache is counted the refcount.
*/
/* Every cache item has a common header that is used
* for expiring and refreshing entries.
*
*/
struct cache_head {
struct cache_head * next;
time_t expiry_time; /* After time time, don't use the data */
time_t last_refresh; /* If CACHE_PENDING, this is when upcall
* was sent, else this is when update was received
*/
struct kref ref;
unsigned long flags;
};
#define CACHE_VALID 0 /* Entry contains valid data */
#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */
#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/
#define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */
struct cache_detail {
struct module * owner;
int hash_size;
struct cache_head ** hash_table;
rwlock_t hash_lock;
atomic_t inuse; /* active user-space update or lookup */
char *name;
void (*cache_put)(struct kref *);
void (*cache_request)(struct cache_detail *cd,
struct cache_head *h,
char **bpp, int *blen);
int (*cache_parse)(struct cache_detail *,
char *buf, int len);
int (*cache_show)(struct seq_file *m,
struct cache_detail *cd,
struct cache_head *h);
struct cache_head * (*alloc)(void);
int (*match)(struct cache_head *orig, struct cache_head *new);
void (*init)(struct cache_head *orig, struct cache_head *new);
void (*update)(struct cache_head *orig, struct cache_head *new);
/* fields below this comment are for internal use
* and should not be touched by cache owners
*/
time_t flush_time; /* flush all cache items with last_refresh
* earlier than this */
struct list_head others;
time_t nextcheck;
int entries;
/* fields for communication over channel */
struct list_head queue;
struct proc_dir_entry *proc_ent;
struct proc_dir_entry *flush_ent, *channel_ent, *content_ent;
atomic_t readers; /* how many time is /chennel open */
time_t last_close; /* if no readers, when did last close */
time_t last_warn; /* when we last warned about no readers */
void (*warn_no_listener)(struct cache_detail *cd);
};
/* this must be embedded in any request structure that
* identifies an object that will want a callback on
* a cache fill
*/
struct cache_req {
struct cache_deferred_req *(*defer)(struct cache_req *req);
};
/* this must be embedded in a deferred_request that is being
* delayed awaiting cache-fill
*/
struct cache_deferred_req {
struct list_head hash; /* on hash chain */
struct list_head recent; /* on fifo */
struct cache_head *item; /* cache item we wait on */
time_t recv_time;
void *owner; /* we might need to discard all defered requests
* owned by someone */
void (*revisit)(struct cache_deferred_req *req,
int too_many);
};
extern struct cache_head *
sunrpc_cache_lookup(struct cache_detail *detail,
struct cache_head *key, int hash);
extern struct cache_head *
sunrpc_cache_update(struct cache_detail *detail,
struct cache_head *new, struct cache_head *old, int hash);
#define cache_for_each(pos, detail, index, member) \
for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \
({if (index==0)read_unlock(&(detail)->hash_lock); index--;}); \
) \
for (pos = container_of((detail)->hash_table[index], typeof(*pos), member); \
&pos->member; \
pos = container_of(pos->member.next, typeof(*pos), member))
extern void cache_clean_deferred(void *owner);
static inline struct cache_head *cache_get(struct cache_head *h)
{
kref_get(&h->ref);
return h;
}
static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
{
if (atomic_read(&h->ref.refcount) <= 2 &&
h->expiry_time < cd->nextcheck)
cd->nextcheck = h->expiry_time;
kref_put(&h->ref, cd->cache_put);
}
static inline int cache_valid(struct cache_head *h)
{
/* If an item has been unhashed pending removal when
* the refcount drops to 0, the expiry_time will be
* set to 0. We don't want to consider such items
* valid in this context even though CACHE_VALID is
* set.
*/
return (h->expiry_time != 0 && test_bit(CACHE_VALID, &h->flags));
}
extern int cache_check(struct cache_detail *detail,
struct cache_head *h, struct cache_req *rqstp);
extern void cache_flush(void);
extern void cache_purge(struct cache_detail *detail);
#define NEVER (0x7FFFFFFF)
extern void cache_register(struct cache_detail *cd);
extern int cache_unregister(struct cache_detail *cd);
extern void qword_add(char **bpp, int *lp, char *str);
extern void qword_addhex(char **bpp, int *lp, char *buf, int blen);
extern int qword_get(char **bpp, char *dest, int bufsize);
static inline int get_int(char **bpp, int *anint)
{
char buf[50];
char *ep;
int rv;
int len = qword_get(bpp, buf, 50);
if (len < 0) return -EINVAL;
if (len ==0) return -ENOENT;
rv = simple_strtol(buf, &ep, 0);
if (*ep) return -EINVAL;
*anint = rv;
return 0;
}
static inline time_t get_expiry(char **bpp)
{
int rv;
if (get_int(bpp, &rv))
return 0;
if (rv < 0)
return 0;
return rv;
}
#endif /* _LINUX_SUNRPC_CACHE_H_ */