4381548237
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
297 lines
9.6 KiB
C
297 lines
9.6 KiB
C
/*
|
|
* NET An implementation of the SOCKET network access protocol.
|
|
* This is the master header file for the Linux NET layer,
|
|
* or, in plain English: the networking handling part of the
|
|
* kernel.
|
|
*
|
|
* Version: @(#)net.h 1.0.3 05/25/93
|
|
*
|
|
* Authors: Orest Zborowski, <obz@Kodak.COM>
|
|
* Ross Biro
|
|
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
#ifndef _LINUX_NET_H
|
|
#define _LINUX_NET_H
|
|
|
|
#include <linux/socket.h>
|
|
#include <asm/socket.h>
|
|
|
|
#define NPROTO AF_MAX
|
|
|
|
#define SYS_SOCKET 1 /* sys_socket(2) */
|
|
#define SYS_BIND 2 /* sys_bind(2) */
|
|
#define SYS_CONNECT 3 /* sys_connect(2) */
|
|
#define SYS_LISTEN 4 /* sys_listen(2) */
|
|
#define SYS_ACCEPT 5 /* sys_accept(2) */
|
|
#define SYS_GETSOCKNAME 6 /* sys_getsockname(2) */
|
|
#define SYS_GETPEERNAME 7 /* sys_getpeername(2) */
|
|
#define SYS_SOCKETPAIR 8 /* sys_socketpair(2) */
|
|
#define SYS_SEND 9 /* sys_send(2) */
|
|
#define SYS_RECV 10 /* sys_recv(2) */
|
|
#define SYS_SENDTO 11 /* sys_sendto(2) */
|
|
#define SYS_RECVFROM 12 /* sys_recvfrom(2) */
|
|
#define SYS_SHUTDOWN 13 /* sys_shutdown(2) */
|
|
#define SYS_SETSOCKOPT 14 /* sys_setsockopt(2) */
|
|
#define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */
|
|
#define SYS_SENDMSG 16 /* sys_sendmsg(2) */
|
|
#define SYS_RECVMSG 17 /* sys_recvmsg(2) */
|
|
#define SYS_ACCEPT4 18 /* sys_accept4(2) */
|
|
#define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */
|
|
|
|
typedef enum {
|
|
SS_FREE = 0, /* not allocated */
|
|
SS_UNCONNECTED, /* unconnected to any socket */
|
|
SS_CONNECTING, /* in process of connecting */
|
|
SS_CONNECTED, /* connected to socket */
|
|
SS_DISCONNECTING /* in process of disconnecting */
|
|
} socket_state;
|
|
|
|
#define __SO_ACCEPTCON (1 << 16) /* performed a listen */
|
|
|
|
#ifdef __KERNEL__
|
|
#include <linux/stringify.h>
|
|
#include <linux/random.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
|
|
#include <linux/kmemcheck.h>
|
|
#include <linux/rcupdate.h>
|
|
|
|
struct poll_table_struct;
|
|
struct pipe_inode_info;
|
|
struct inode;
|
|
struct net;
|
|
|
|
#define SOCK_ASYNC_NOSPACE 0
|
|
#define SOCK_ASYNC_WAITDATA 1
|
|
#define SOCK_NOSPACE 2
|
|
#define SOCK_PASSCRED 3
|
|
#define SOCK_PASSSEC 4
|
|
|
|
#ifndef ARCH_HAS_SOCKET_TYPES
|
|
/**
|
|
* enum sock_type - Socket types
|
|
* @SOCK_STREAM: stream (connection) socket
|
|
* @SOCK_DGRAM: datagram (conn.less) socket
|
|
* @SOCK_RAW: raw socket
|
|
* @SOCK_RDM: reliably-delivered message
|
|
* @SOCK_SEQPACKET: sequential packet socket
|
|
* @SOCK_DCCP: Datagram Congestion Control Protocol socket
|
|
* @SOCK_PACKET: linux specific way of getting packets at the dev level.
|
|
* For writing rarp and other similar things on the user level.
|
|
*
|
|
* When adding some new socket type please
|
|
* grep ARCH_HAS_SOCKET_TYPE include/asm-* /socket.h, at least MIPS
|
|
* overrides this enum for binary compat reasons.
|
|
*/
|
|
enum sock_type {
|
|
SOCK_STREAM = 1,
|
|
SOCK_DGRAM = 2,
|
|
SOCK_RAW = 3,
|
|
SOCK_RDM = 4,
|
|
SOCK_SEQPACKET = 5,
|
|
SOCK_DCCP = 6,
|
|
SOCK_PACKET = 10,
|
|
};
|
|
|
|
#define SOCK_MAX (SOCK_PACKET + 1)
|
|
/* Mask which covers at least up to SOCK_MASK-1. The
|
|
* remaining bits are used as flags. */
|
|
#define SOCK_TYPE_MASK 0xf
|
|
|
|
/* Flags for socket, socketpair, accept4 */
|
|
#define SOCK_CLOEXEC O_CLOEXEC
|
|
#ifndef SOCK_NONBLOCK
|
|
#define SOCK_NONBLOCK O_NONBLOCK
|
|
#endif
|
|
|
|
#endif /* ARCH_HAS_SOCKET_TYPES */
|
|
|
|
enum sock_shutdown_cmd {
|
|
SHUT_RD = 0,
|
|
SHUT_WR = 1,
|
|
SHUT_RDWR = 2,
|
|
};
|
|
|
|
struct socket_wq {
|
|
wait_queue_head_t wait;
|
|
struct fasync_struct *fasync_list;
|
|
struct rcu_head rcu;
|
|
} ____cacheline_aligned_in_smp;
|
|
|
|
/**
|
|
* struct socket - general BSD socket
|
|
* @state: socket state (%SS_CONNECTED, etc)
|
|
* @type: socket type (%SOCK_STREAM, etc)
|
|
* @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
|
|
* @ops: protocol specific socket operations
|
|
* @fasync_list: Asynchronous wake up list
|
|
* @file: File back pointer for gc
|
|
* @sk: internal networking protocol agnostic socket representation
|
|
* @wait: wait queue for several uses
|
|
*/
|
|
struct socket {
|
|
socket_state state;
|
|
|
|
kmemcheck_bitfield_begin(type);
|
|
short type;
|
|
kmemcheck_bitfield_end(type);
|
|
|
|
unsigned long flags;
|
|
|
|
struct socket_wq *wq;
|
|
|
|
struct file *file;
|
|
struct sock *sk;
|
|
const struct proto_ops *ops;
|
|
};
|
|
|
|
struct vm_area_struct;
|
|
struct page;
|
|
struct kiocb;
|
|
struct sockaddr;
|
|
struct msghdr;
|
|
struct module;
|
|
|
|
struct proto_ops {
|
|
int family;
|
|
struct module *owner;
|
|
int (*release) (struct socket *sock);
|
|
int (*bind) (struct socket *sock,
|
|
struct sockaddr *myaddr,
|
|
int sockaddr_len);
|
|
int (*connect) (struct socket *sock,
|
|
struct sockaddr *vaddr,
|
|
int sockaddr_len, int flags);
|
|
int (*socketpair)(struct socket *sock1,
|
|
struct socket *sock2);
|
|
int (*accept) (struct socket *sock,
|
|
struct socket *newsock, int flags);
|
|
int (*getname) (struct socket *sock,
|
|
struct sockaddr *addr,
|
|
int *sockaddr_len, int peer);
|
|
unsigned int (*poll) (struct file *file, struct socket *sock,
|
|
struct poll_table_struct *wait);
|
|
int (*ioctl) (struct socket *sock, unsigned int cmd,
|
|
unsigned long arg);
|
|
#ifdef CONFIG_COMPAT
|
|
int (*compat_ioctl) (struct socket *sock, unsigned int cmd,
|
|
unsigned long arg);
|
|
#endif
|
|
int (*listen) (struct socket *sock, int len);
|
|
int (*shutdown) (struct socket *sock, int flags);
|
|
int (*setsockopt)(struct socket *sock, int level,
|
|
int optname, char __user *optval, unsigned int optlen);
|
|
int (*getsockopt)(struct socket *sock, int level,
|
|
int optname, char __user *optval, int __user *optlen);
|
|
#ifdef CONFIG_COMPAT
|
|
int (*compat_setsockopt)(struct socket *sock, int level,
|
|
int optname, char __user *optval, unsigned int optlen);
|
|
int (*compat_getsockopt)(struct socket *sock, int level,
|
|
int optname, char __user *optval, int __user *optlen);
|
|
#endif
|
|
int (*sendmsg) (struct kiocb *iocb, struct socket *sock,
|
|
struct msghdr *m, size_t total_len);
|
|
int (*recvmsg) (struct kiocb *iocb, struct socket *sock,
|
|
struct msghdr *m, size_t total_len,
|
|
int flags);
|
|
int (*mmap) (struct file *file, struct socket *sock,
|
|
struct vm_area_struct * vma);
|
|
ssize_t (*sendpage) (struct socket *sock, struct page *page,
|
|
int offset, size_t size, int flags);
|
|
ssize_t (*splice_read)(struct socket *sock, loff_t *ppos,
|
|
struct pipe_inode_info *pipe, size_t len, unsigned int flags);
|
|
};
|
|
|
|
#define DECLARE_SOCKADDR(type, dst, src) \
|
|
type dst = ({ __sockaddr_check_size(sizeof(*dst)); (type) src; })
|
|
|
|
struct net_proto_family {
|
|
int family;
|
|
int (*create)(struct net *net, struct socket *sock,
|
|
int protocol, int kern);
|
|
struct module *owner;
|
|
};
|
|
|
|
struct iovec;
|
|
struct kvec;
|
|
|
|
enum {
|
|
SOCK_WAKE_IO,
|
|
SOCK_WAKE_WAITD,
|
|
SOCK_WAKE_SPACE,
|
|
SOCK_WAKE_URG,
|
|
};
|
|
|
|
extern int sock_wake_async(struct socket *sk, int how, int band);
|
|
extern int sock_register(const struct net_proto_family *fam);
|
|
extern void sock_unregister(int family);
|
|
extern int sock_create(int family, int type, int proto,
|
|
struct socket **res);
|
|
extern int sock_create_kern(int family, int type, int proto,
|
|
struct socket **res);
|
|
extern int sock_create_lite(int family, int type, int proto,
|
|
struct socket **res);
|
|
extern void sock_release(struct socket *sock);
|
|
extern int sock_sendmsg(struct socket *sock, struct msghdr *msg,
|
|
size_t len);
|
|
extern int sock_recvmsg(struct socket *sock, struct msghdr *msg,
|
|
size_t size, int flags);
|
|
extern int sock_map_fd(struct socket *sock, int flags);
|
|
extern struct socket *sockfd_lookup(int fd, int *err);
|
|
#define sockfd_put(sock) fput(sock->file)
|
|
extern int net_ratelimit(void);
|
|
|
|
#define net_random() random32()
|
|
#define net_srandom(seed) srandom32((__force u32)seed)
|
|
|
|
extern int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
|
|
struct kvec *vec, size_t num, size_t len);
|
|
extern int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
|
|
struct kvec *vec, size_t num,
|
|
size_t len, int flags);
|
|
|
|
extern int kernel_bind(struct socket *sock, struct sockaddr *addr,
|
|
int addrlen);
|
|
extern int kernel_listen(struct socket *sock, int backlog);
|
|
extern int kernel_accept(struct socket *sock, struct socket **newsock,
|
|
int flags);
|
|
extern int kernel_connect(struct socket *sock, struct sockaddr *addr,
|
|
int addrlen, int flags);
|
|
extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
|
|
int *addrlen);
|
|
extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
|
|
int *addrlen);
|
|
extern int kernel_getsockopt(struct socket *sock, int level, int optname,
|
|
char *optval, int *optlen);
|
|
extern int kernel_setsockopt(struct socket *sock, int level, int optname,
|
|
char *optval, unsigned int optlen);
|
|
extern int kernel_sendpage(struct socket *sock, struct page *page, int offset,
|
|
size_t size, int flags);
|
|
extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
|
|
extern int kernel_sock_shutdown(struct socket *sock,
|
|
enum sock_shutdown_cmd how);
|
|
|
|
#define MODULE_ALIAS_NETPROTO(proto) \
|
|
MODULE_ALIAS("net-pf-" __stringify(proto))
|
|
|
|
#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \
|
|
MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto))
|
|
|
|
#define MODULE_ALIAS_NET_PF_PROTO_TYPE(pf, proto, type) \
|
|
MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
|
|
"-type-" __stringify(type))
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
#include <linux/sysctl.h>
|
|
#include <linux/ratelimit.h>
|
|
extern struct ratelimit_state net_ratelimit_state;
|
|
#endif
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _LINUX_NET_H */
|