kernel-ark/include/net/route.h
Eric Dumazet 29e75252da [IPV4] route cache: Introduce rt_genid for smooth cache invalidation
Current ip route cache implementation is not suited to large caches.

We can consume a lot of CPU when cache must be invalidated, since we
currently need to evict all cache entries, and this eviction is
sometimes asynchronous. min_delay & max_delay can somewhat control this
asynchronism behavior, but whole thing is a kludge, regularly triggering
infamous soft lockup messages. When entries are still in use, this also
consumes a lot of ram, filling dst_garbage.list.

A better scheme is to use a generation identifier on each entry,
so that cache invalidation can be performed by changing the table
identifier, without having to scan all entries.
No more delayed flushing, no more stalling when secret_interval expires.

Invalidated entries will then be freed at GC time (controled by
ip_rt_gc_timeout or stress), or when an invalidated entry is found
in a chain when an insert is done.
Thus we keep a normal equilibrium.

This patch :
- renames rt_hash_rnd to rt_genid (and makes it an atomic_t)
- Adds a new rt_genid field to 'struct rtable' (filling a hole on 64bit)
- Checks entry->rt_genid at appropriate places :
2008-01-31 19:28:27 -08:00

211 lines
5.6 KiB
C

/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Definitions for the IP router.
*
* Version: @(#)route.h 1.0.4 05/27/93
*
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Fixes:
* Alan Cox : Reformatted. Added ip_rt_local()
* Alan Cox : Support for TCP parameters.
* Alexey Kuznetsov: Major changes for new routing code.
* Mike McLagan : Routing by source
* Robert Olsson : Added rt_cache statistics
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _ROUTE_H
#define _ROUTE_H
#include <net/dst.h>
#include <net/inetpeer.h>
#include <net/flow.h>
#include <net/sock.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/route.h>
#include <linux/ip.h>
#include <linux/cache.h>
#include <linux/security.h>
#include <net/sock.h>
#ifndef __KERNEL__
#warning This file is not supposed to be used outside of kernel.
#endif
#define RTO_ONLINK 0x01
#define RTO_CONN 0
/* RTO_CONN is not used (being alias for 0), but preserved not to break
* some modules referring to it. */
#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
struct fib_nh;
struct inet_peer;
struct rtable
{
union
{
struct dst_entry dst;
} u;
/* Cache lookup keys */
struct flowi fl;
struct in_device *idev;
int rt_genid;
unsigned rt_flags;
__u16 rt_type;
__be32 rt_dst; /* Path destination */
__be32 rt_src; /* Path source */
int rt_iif;
/* Info on neighbour */
__be32 rt_gateway;
/* Miscellaneous cached information */
__be32 rt_spec_dst; /* RFC1122 specific destination */
struct inet_peer *peer; /* long-living peer info */
};
struct ip_rt_acct
{
__u32 o_bytes;
__u32 o_packets;
__u32 i_bytes;
__u32 i_packets;
};
struct rt_cache_stat
{
unsigned int in_hit;
unsigned int in_slow_tot;
unsigned int in_slow_mc;
unsigned int in_no_route;
unsigned int in_brd;
unsigned int in_martian_dst;
unsigned int in_martian_src;
unsigned int out_hit;
unsigned int out_slow_tot;
unsigned int out_slow_mc;
unsigned int gc_total;
unsigned int gc_ignored;
unsigned int gc_goal_miss;
unsigned int gc_dst_overflow;
unsigned int in_hlist_search;
unsigned int out_hlist_search;
};
extern struct ip_rt_acct *ip_rt_acct;
struct in_device;
extern int ip_rt_init(void);
extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
__be32 src, struct net_device *dev);
extern void rt_cache_flush(int how);
extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin);
extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu);
extern void ip_rt_send_redirect(struct sk_buff *skb);
extern unsigned inet_addr_type(struct net *net, __be32 addr);
extern unsigned inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr);
extern void ip_rt_multicast_event(struct in_device *);
extern int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg);
extern void ip_rt_get_source(u8 *src, struct rtable *rt);
extern int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb);
struct in_ifaddr;
extern void fib_add_ifaddr(struct in_ifaddr *);
static inline void ip_rt_put(struct rtable * rt)
{
if (rt)
dst_release(&rt->u.dst);
}
#define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3)
extern const __u8 ip_tos2prio[16];
static inline char rt_tos2priority(u8 tos)
{
return ip_tos2prio[IPTOS_TOS(tos)>>1];
}
static inline int ip_route_connect(struct rtable **rp, __be32 dst,
__be32 src, u32 tos, int oif, u8 protocol,
__be16 sport, __be16 dport, struct sock *sk,
int flags)
{
struct flowi fl = { .oif = oif,
.mark = sk->sk_mark,
.nl_u = { .ip4_u = { .daddr = dst,
.saddr = src,
.tos = tos } },
.proto = protocol,
.uli_u = { .ports =
{ .sport = sport,
.dport = dport } } };
int err;
struct net *net = sk->sk_net;
if (!dst || !src) {
err = __ip_route_output_key(net, rp, &fl);
if (err)
return err;
fl.fl4_dst = (*rp)->rt_dst;
fl.fl4_src = (*rp)->rt_src;
ip_rt_put(*rp);
*rp = NULL;
}
security_sk_classify_flow(sk, &fl);
return ip_route_output_flow(net, rp, &fl, sk, flags);
}
static inline int ip_route_newports(struct rtable **rp, u8 protocol,
__be16 sport, __be16 dport, struct sock *sk)
{
if (sport != (*rp)->fl.fl_ip_sport ||
dport != (*rp)->fl.fl_ip_dport) {
struct flowi fl;
memcpy(&fl, &(*rp)->fl, sizeof(fl));
fl.fl_ip_sport = sport;
fl.fl_ip_dport = dport;
fl.proto = protocol;
ip_rt_put(*rp);
*rp = NULL;
security_sk_classify_flow(sk, &fl);
return ip_route_output_flow(sk->sk_net, rp, &fl, sk, 0);
}
return 0;
}
extern void rt_bind_peer(struct rtable *rt, int create);
static inline struct inet_peer *rt_get_peer(struct rtable *rt)
{
if (rt->peer)
return rt->peer;
rt_bind_peer(rt, 0);
return rt->peer;
}
extern ctl_table ipv4_route_table[];
#endif /* _ROUTE_H */