0c19f846d5
Tuntap and similar devices can inject GSO packets. Accept type VIRTIO_NET_HDR_GSO_UDP, even though not generating UFO natively. Processes are expected to use feature negotiation such as TUNSETOFFLOAD to detect supported offload types and refrain from injecting other packets. This process breaks down with live migration: guest kernels do not renegotiate flags, so destination hosts need to expose all features that the source host does. Partially revert the UFO removal from 182e0b6b5846~1..d9d30adf5677. This patch introduces nearly(*) no new code to simplify verification. It brings back verbatim tuntap UFO negotiation, VIRTIO_NET_HDR_GSO_UDP insertion and software UFO segmentation. It does not reinstate protocol stack support, hardware offload (NETIF_F_UFO), SKB_GSO_UDP tunneling in SKB_GSO_SOFTWARE or reception of VIRTIO_NET_HDR_GSO_UDP packets in tuntap. To support SKB_GSO_UDP reappearing in the stack, also reinstate logic in act_csum and openvswitch. Achieve equivalence with v4.13 HEAD by squashing in commit939912216f
("net: skb_needs_check() removes CHECKSUM_UNNECESSARY check for tx.") and reverting commit8d63bee643
("net: avoid skb_warn_bad_offload false positives on UFO"). (*) To avoid having to bring back skb_shinfo(skb)->ip6_frag_id, ipv6_proxy_select_ident is changed to return a __be32 and this is assigned directly to the frag_hdr. Also, SKB_GSO_UDP is inserted at the end of the enum to minimize code churn. Tested Booted a v4.13 guest kernel with QEMU. On a host kernel before this patch `ethtool -k eth0` shows UFO disabled. After the patch, it is enabled, same as on a v4.13 host kernel. A UFO packet sent from the guest appears on the tap device: host: nc -l -p -u 8000 & tcpdump -n -i tap0 guest: dd if=/dev/zero of=payload.txt bs=1 count=2000 nc -u 192.16.1.1 8000 < payload.txt Direct tap to tap transmission of VIRTIO_NET_HDR_GSO_UDP succeeds, packets arriving fragmented: ./with_tap_pair.sh ./tap_send_ufo tap0 tap1 (from https://github.com/wdebruij/kerneltools/tree/master/tests) Changes v1 -> v2 - simplified set_offload change (review comment) - documented test procedure Link: http://lkml.kernel.org/r/<CAF=yD-LuUeDuL9YWPJD9ykOZ0QCjNeznPDr6whqZ9NGMNF12Mw@mail.gmail.com> Fixes:fb652fdfe8
("macvlan/macvtap: Remove NETIF_F_UFO advertisement.") Reported-by: Michal Kubecek <mkubecek@suse.cz> Signed-off-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
181 lines
4.3 KiB
C
181 lines
4.3 KiB
C
/*
|
|
* IPv6 library code, needed by static components when full IPv6 support is
|
|
* not configured or static. These functions are needed by GSO/GRO implementation.
|
|
*/
|
|
#include <linux/export.h>
|
|
#include <net/ip.h>
|
|
#include <net/ipv6.h>
|
|
#include <net/ip6_fib.h>
|
|
#include <net/addrconf.h>
|
|
#include <net/secure_seq.h>
|
|
#include <linux/netfilter.h>
|
|
|
|
static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
|
|
const struct in6_addr *dst,
|
|
const struct in6_addr *src)
|
|
{
|
|
u32 hash, id;
|
|
|
|
hash = __ipv6_addr_jhash(dst, hashrnd);
|
|
hash = __ipv6_addr_jhash(src, hash);
|
|
hash ^= net_hash_mix(net);
|
|
|
|
/* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
|
|
* set the hight order instead thus minimizing possible future
|
|
* collisions.
|
|
*/
|
|
id = ip_idents_reserve(hash, 1);
|
|
if (unlikely(!id))
|
|
id = 1 << 31;
|
|
|
|
return id;
|
|
}
|
|
|
|
/* This function exists only for tap drivers that must support broken
|
|
* clients requesting UFO without specifying an IPv6 fragment ID.
|
|
*
|
|
* This is similar to ipv6_select_ident() but we use an independent hash
|
|
* seed to limit information leakage.
|
|
*
|
|
* The network header must be set before calling this.
|
|
*/
|
|
__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
|
|
{
|
|
static u32 ip6_proxy_idents_hashrnd __read_mostly;
|
|
struct in6_addr buf[2];
|
|
struct in6_addr *addrs;
|
|
u32 id;
|
|
|
|
addrs = skb_header_pointer(skb,
|
|
skb_network_offset(skb) +
|
|
offsetof(struct ipv6hdr, saddr),
|
|
sizeof(buf), buf);
|
|
if (!addrs)
|
|
return 0;
|
|
|
|
net_get_random_once(&ip6_proxy_idents_hashrnd,
|
|
sizeof(ip6_proxy_idents_hashrnd));
|
|
|
|
id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
|
|
&addrs[1], &addrs[0]);
|
|
return htonl(id);
|
|
}
|
|
EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
|
|
|
|
__be32 ipv6_select_ident(struct net *net,
|
|
const struct in6_addr *daddr,
|
|
const struct in6_addr *saddr)
|
|
{
|
|
static u32 ip6_idents_hashrnd __read_mostly;
|
|
u32 id;
|
|
|
|
net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
|
|
|
|
id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
|
|
return htonl(id);
|
|
}
|
|
EXPORT_SYMBOL(ipv6_select_ident);
|
|
|
|
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
|
|
{
|
|
unsigned int offset = sizeof(struct ipv6hdr);
|
|
unsigned int packet_len = skb_tail_pointer(skb) -
|
|
skb_network_header(skb);
|
|
int found_rhdr = 0;
|
|
*nexthdr = &ipv6_hdr(skb)->nexthdr;
|
|
|
|
while (offset <= packet_len) {
|
|
struct ipv6_opt_hdr *exthdr;
|
|
|
|
switch (**nexthdr) {
|
|
|
|
case NEXTHDR_HOP:
|
|
break;
|
|
case NEXTHDR_ROUTING:
|
|
found_rhdr = 1;
|
|
break;
|
|
case NEXTHDR_DEST:
|
|
#if IS_ENABLED(CONFIG_IPV6_MIP6)
|
|
if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
|
|
break;
|
|
#endif
|
|
if (found_rhdr)
|
|
return offset;
|
|
break;
|
|
default:
|
|
return offset;
|
|
}
|
|
|
|
if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
|
|
return -EINVAL;
|
|
|
|
exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
|
|
offset);
|
|
offset += ipv6_optlen(exthdr);
|
|
if (offset > IPV6_MAXPLEN)
|
|
return -EINVAL;
|
|
*nexthdr = &exthdr->nexthdr;
|
|
}
|
|
|
|
return -EINVAL;
|
|
}
|
|
EXPORT_SYMBOL(ip6_find_1stfragopt);
|
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
int ip6_dst_hoplimit(struct dst_entry *dst)
|
|
{
|
|
int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
|
|
if (hoplimit == 0) {
|
|
struct net_device *dev = dst->dev;
|
|
struct inet6_dev *idev;
|
|
|
|
rcu_read_lock();
|
|
idev = __in6_dev_get(dev);
|
|
if (idev)
|
|
hoplimit = idev->cnf.hop_limit;
|
|
else
|
|
hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
|
|
rcu_read_unlock();
|
|
}
|
|
return hoplimit;
|
|
}
|
|
EXPORT_SYMBOL(ip6_dst_hoplimit);
|
|
#endif
|
|
|
|
int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
int len;
|
|
|
|
len = skb->len - sizeof(struct ipv6hdr);
|
|
if (len > IPV6_MAXPLEN)
|
|
len = 0;
|
|
ipv6_hdr(skb)->payload_len = htons(len);
|
|
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
|
|
|
|
/* if egress device is enslaved to an L3 master device pass the
|
|
* skb to its handler for processing
|
|
*/
|
|
skb = l3mdev_ip6_out(sk, skb);
|
|
if (unlikely(!skb))
|
|
return 0;
|
|
|
|
skb->protocol = htons(ETH_P_IPV6);
|
|
|
|
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
|
|
net, sk, skb, NULL, skb_dst(skb)->dev,
|
|
dst_output);
|
|
}
|
|
EXPORT_SYMBOL_GPL(__ip6_local_out);
|
|
|
|
int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
int err;
|
|
|
|
err = __ip6_local_out(net, sk, skb);
|
|
if (likely(err == 1))
|
|
err = dst_output(net, sk, skb);
|
|
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL_GPL(ip6_local_out);
|