c6894dec8e
After promisc mode management was introduced a bridge device could do
dev_set_promiscuity from its ndo_change_rx_flags() callback which in
turn can be called after the bridge's addr_list_lock has been taken
(e.g. by dev_uc_add). This causes a false positive lockdep splat because
the port interfaces' addr_list_lock is taken when br_manage_promisc()
runs after the bridge's addr list lock was already taken.
To remove the false positive introduce a custom bridge addr_list_lock
class and set it on bridge init.
A simple way to reproduce this is with the following:
$ brctl addbr br0
$ ip l add l br0 br0.100 type vlan id 100
$ ip l set br0 up
$ ip l set br0.100 up
$ echo 1 > /sys/class/net/br0/bridge/vlan_filtering
$ brctl addif br0 eth0
Splat:
[ 43.684325] =============================================
[ 43.684485] [ INFO: possible recursive locking detected ]
[ 43.684636] 4.4.0-rc8+ #54 Not tainted
[ 43.684755] ---------------------------------------------
[ 43.684906] brctl/1187 is trying to acquire lock:
[ 43.685047] (_xmit_ETHER){+.....}, at: [<ffffffff8150169e>] dev_set_rx_mode+0x1e/0x40
[ 43.685460] but task is already holding lock:
[ 43.685618] (_xmit_ETHER){+.....}, at: [<ffffffff815072a7>] dev_uc_add+0x27/0x80
[ 43.686015] other info that might help us debug this:
[ 43.686316] Possible unsafe locking scenario:
[ 43.686743] CPU0
[ 43.686967] ----
[ 43.687197] lock(_xmit_ETHER);
[ 43.687544] lock(_xmit_ETHER);
[ 43.687886] *** DEADLOCK ***
[ 43.688438] May be due to missing lock nesting notation
[ 43.688882] 2 locks held by brctl/1187:
[ 43.689134] #0: (rtnl_mutex){+.+.+.}, at: [<ffffffff81510317>] rtnl_lock+0x17/0x20
[ 43.689852] #1: (_xmit_ETHER){+.....}, at: [<ffffffff815072a7>] dev_uc_add+0x27/0x80
[ 43.690575] stack backtrace:
[ 43.690970] CPU: 0 PID: 1187 Comm: brctl Not tainted 4.4.0-rc8+ #54
[ 43.691270] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
[ 43.691770] ffffffff826a25c0 ffff8800369fb8e0 ffffffff81360ceb ffffffff826a25c0
[ 43.692425] ffff8800369fb9b8 ffffffff810d0466 ffff8800369fb968 ffffffff81537139
[ 43.693071] ffff88003a08c880 0000000000000000 00000000ffffffff 0000000002080020
[ 43.693709] Call Trace:
[ 43.693931] [<ffffffff81360ceb>] dump_stack+0x4b/0x70
[ 43.694199] [<ffffffff810d0466>] __lock_acquire+0x1e46/0x1e90
[ 43.694483] [<ffffffff81537139>] ? netlink_broadcast_filtered+0x139/0x3e0
[ 43.694789] [<ffffffff8153b5da>] ? nlmsg_notify+0x5a/0xc0
[ 43.695064] [<ffffffff810d10f5>] lock_acquire+0xe5/0x1f0
[ 43.695340] [<ffffffff8150169e>] ? dev_set_rx_mode+0x1e/0x40
[ 43.695623] [<ffffffff815edea5>] _raw_spin_lock_bh+0x45/0x80
[ 43.695901] [<ffffffff8150169e>] ? dev_set_rx_mode+0x1e/0x40
[ 43.696180] [<ffffffff8150169e>] dev_set_rx_mode+0x1e/0x40
[ 43.696460] [<ffffffff8150189c>] dev_set_promiscuity+0x3c/0x50
[ 43.696750] [<ffffffffa0586845>] br_port_set_promisc+0x25/0x50 [bridge]
[ 43.697052] [<ffffffffa05869aa>] br_manage_promisc+0x8a/0xe0 [bridge]
[ 43.697348] [<ffffffffa05826ee>] br_dev_change_rx_flags+0x1e/0x20 [bridge]
[ 43.697655] [<ffffffff81501532>] __dev_set_promiscuity+0x132/0x1f0
[ 43.697943] [<ffffffff81501672>] __dev_set_rx_mode+0x82/0x90
[ 43.698223] [<ffffffff815072de>] dev_uc_add+0x5e/0x80
[ 43.698498] [<ffffffffa05b3c62>] vlan_device_event+0x542/0x650 [8021q]
[ 43.698798] [<ffffffff8109886d>] notifier_call_chain+0x5d/0x80
[ 43.699083] [<ffffffff810988b6>] raw_notifier_call_chain+0x16/0x20
[ 43.699374] [<ffffffff814f456e>] call_netdevice_notifiers_info+0x6e/0x80
[ 43.699678] [<ffffffff814f4596>] call_netdevice_notifiers+0x16/0x20
[ 43.699973] [<ffffffffa05872be>] br_add_if+0x47e/0x4c0 [bridge]
[ 43.700259] [<ffffffffa058801e>] add_del_if+0x6e/0x80 [bridge]
[ 43.700548] [<ffffffffa0588b5f>] br_dev_ioctl+0xaf/0xc0 [bridge]
[ 43.700836] [<ffffffff8151a7ac>] dev_ifsioc+0x30c/0x3c0
[ 43.701106] [<ffffffff8151aac9>] dev_ioctl+0xf9/0x6f0
[ 43.701379] [<ffffffff81254345>] ? mntput_no_expire+0x5/0x450
[ 43.701665] [<ffffffff812543ee>] ? mntput_no_expire+0xae/0x450
[ 43.701947] [<ffffffff814d7b02>] sock_do_ioctl+0x42/0x50
[ 43.702219] [<ffffffff814d8175>] sock_ioctl+0x1e5/0x290
[ 43.702500] [<ffffffff81242d0b>] do_vfs_ioctl+0x2cb/0x5c0
[ 43.702771] [<ffffffff81243079>] SyS_ioctl+0x79/0x90
[ 43.703033] [<ffffffff815eebb6>] entry_SYSCALL_64_fastpath+0x16/0x7a
CC: Vlad Yasevich <vyasevic@redhat.com>
CC: Stephen Hemminger <stephen@networkplumber.org>
CC: Bridge list <bridge@lists.linux-foundation.org>
CC: Andy Gospodarek <gospo@cumulusnetworks.com>
CC: Roopa Prabhu <roopa@cumulusnetworks.com>
Fixes: 2796d0c648
("bridge: Automatically manage port promiscuous mode.")
Reported-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
408 lines
9.5 KiB
C
408 lines
9.5 KiB
C
/*
|
|
* Device handling code
|
|
* Linux ethernet bridge
|
|
*
|
|
* Authors:
|
|
* Lennert Buytenhek <buytenh@gnu.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/netpoll.h>
|
|
#include <linux/etherdevice.h>
|
|
#include <linux/ethtool.h>
|
|
#include <linux/list.h>
|
|
#include <linux/netfilter_bridge.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
#include "br_private.h"
|
|
|
|
#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \
|
|
NETIF_F_GSO_MASK | NETIF_F_HW_CSUM)
|
|
|
|
const struct nf_br_ops __rcu *nf_br_ops __read_mostly;
|
|
EXPORT_SYMBOL_GPL(nf_br_ops);
|
|
|
|
static struct lock_class_key bridge_netdev_addr_lock_key;
|
|
|
|
/* net device transmit always called with BH disabled */
|
|
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
const unsigned char *dest = skb->data;
|
|
struct net_bridge_fdb_entry *dst;
|
|
struct net_bridge_mdb_entry *mdst;
|
|
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
|
|
const struct nf_br_ops *nf_ops;
|
|
u16 vid = 0;
|
|
|
|
rcu_read_lock();
|
|
nf_ops = rcu_dereference(nf_br_ops);
|
|
if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) {
|
|
rcu_read_unlock();
|
|
return NETDEV_TX_OK;
|
|
}
|
|
|
|
u64_stats_update_begin(&brstats->syncp);
|
|
brstats->tx_packets++;
|
|
brstats->tx_bytes += skb->len;
|
|
u64_stats_update_end(&brstats->syncp);
|
|
|
|
BR_INPUT_SKB_CB(skb)->brdev = dev;
|
|
|
|
skb_reset_mac_header(skb);
|
|
skb_pull(skb, ETH_HLEN);
|
|
|
|
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
|
|
goto out;
|
|
|
|
if (is_broadcast_ether_addr(dest))
|
|
br_flood_deliver(br, skb, false);
|
|
else if (is_multicast_ether_addr(dest)) {
|
|
if (unlikely(netpoll_tx_running(dev))) {
|
|
br_flood_deliver(br, skb, false);
|
|
goto out;
|
|
}
|
|
if (br_multicast_rcv(br, NULL, skb, vid)) {
|
|
kfree_skb(skb);
|
|
goto out;
|
|
}
|
|
|
|
mdst = br_mdb_get(br, skb, vid);
|
|
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
|
|
br_multicast_querier_exists(br, eth_hdr(skb)))
|
|
br_multicast_deliver(mdst, skb);
|
|
else
|
|
br_flood_deliver(br, skb, false);
|
|
} else if ((dst = __br_fdb_get(br, dest, vid)) != NULL)
|
|
br_deliver(dst->dst, skb);
|
|
else
|
|
br_flood_deliver(br, skb, true);
|
|
|
|
out:
|
|
rcu_read_unlock();
|
|
return NETDEV_TX_OK;
|
|
}
|
|
|
|
static void br_set_lockdep_class(struct net_device *dev)
|
|
{
|
|
lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
|
|
}
|
|
|
|
static int br_dev_init(struct net_device *dev)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
int err;
|
|
|
|
br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
|
|
if (!br->stats)
|
|
return -ENOMEM;
|
|
|
|
err = br_vlan_init(br);
|
|
if (err)
|
|
free_percpu(br->stats);
|
|
br_set_lockdep_class(dev);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int br_dev_open(struct net_device *dev)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
|
|
netdev_update_features(dev);
|
|
netif_start_queue(dev);
|
|
br_stp_enable_bridge(br);
|
|
br_multicast_open(br);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void br_dev_set_multicast_list(struct net_device *dev)
|
|
{
|
|
}
|
|
|
|
static void br_dev_change_rx_flags(struct net_device *dev, int change)
|
|
{
|
|
if (change & IFF_PROMISC)
|
|
br_manage_promisc(netdev_priv(dev));
|
|
}
|
|
|
|
static int br_dev_stop(struct net_device *dev)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
|
|
br_stp_disable_bridge(br);
|
|
br_multicast_stop(br);
|
|
|
|
netif_stop_queue(dev);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
|
|
struct rtnl_link_stats64 *stats)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
struct pcpu_sw_netstats tmp, sum = { 0 };
|
|
unsigned int cpu;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
unsigned int start;
|
|
const struct pcpu_sw_netstats *bstats
|
|
= per_cpu_ptr(br->stats, cpu);
|
|
do {
|
|
start = u64_stats_fetch_begin_irq(&bstats->syncp);
|
|
memcpy(&tmp, bstats, sizeof(tmp));
|
|
} while (u64_stats_fetch_retry_irq(&bstats->syncp, start));
|
|
sum.tx_bytes += tmp.tx_bytes;
|
|
sum.tx_packets += tmp.tx_packets;
|
|
sum.rx_bytes += tmp.rx_bytes;
|
|
sum.rx_packets += tmp.rx_packets;
|
|
}
|
|
|
|
stats->tx_bytes = sum.tx_bytes;
|
|
stats->tx_packets = sum.tx_packets;
|
|
stats->rx_bytes = sum.rx_bytes;
|
|
stats->rx_packets = sum.rx_packets;
|
|
|
|
return stats;
|
|
}
|
|
|
|
static int br_change_mtu(struct net_device *dev, int new_mtu)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
if (new_mtu < 68 || new_mtu > br_min_mtu(br))
|
|
return -EINVAL;
|
|
|
|
dev->mtu = new_mtu;
|
|
|
|
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
|
|
/* remember the MTU in the rtable for PMTU */
|
|
dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Allow setting mac address to any valid ethernet address. */
|
|
static int br_set_mac_address(struct net_device *dev, void *p)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
struct sockaddr *addr = p;
|
|
|
|
if (!is_valid_ether_addr(addr->sa_data))
|
|
return -EADDRNOTAVAIL;
|
|
|
|
spin_lock_bh(&br->lock);
|
|
if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) {
|
|
/* Mac address will be changed in br_stp_change_bridge_id(). */
|
|
br_stp_change_bridge_id(br, addr->sa_data);
|
|
}
|
|
spin_unlock_bh(&br->lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
|
|
{
|
|
strlcpy(info->driver, "bridge", sizeof(info->driver));
|
|
strlcpy(info->version, BR_VERSION, sizeof(info->version));
|
|
strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
|
|
strlcpy(info->bus_info, "N/A", sizeof(info->bus_info));
|
|
}
|
|
|
|
static netdev_features_t br_fix_features(struct net_device *dev,
|
|
netdev_features_t features)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
|
|
return br_features_recompute(br, features);
|
|
}
|
|
|
|
#ifdef CONFIG_NET_POLL_CONTROLLER
|
|
static void br_poll_controller(struct net_device *br_dev)
|
|
{
|
|
}
|
|
|
|
static void br_netpoll_cleanup(struct net_device *dev)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
struct net_bridge_port *p;
|
|
|
|
list_for_each_entry(p, &br->port_list, list)
|
|
br_netpoll_disable(p);
|
|
}
|
|
|
|
static int __br_netpoll_enable(struct net_bridge_port *p)
|
|
{
|
|
struct netpoll *np;
|
|
int err;
|
|
|
|
np = kzalloc(sizeof(*p->np), GFP_KERNEL);
|
|
if (!np)
|
|
return -ENOMEM;
|
|
|
|
err = __netpoll_setup(np, p->dev);
|
|
if (err) {
|
|
kfree(np);
|
|
return err;
|
|
}
|
|
|
|
p->np = np;
|
|
return err;
|
|
}
|
|
|
|
int br_netpoll_enable(struct net_bridge_port *p)
|
|
{
|
|
if (!p->br->dev->npinfo)
|
|
return 0;
|
|
|
|
return __br_netpoll_enable(p);
|
|
}
|
|
|
|
static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
struct net_bridge_port *p;
|
|
int err = 0;
|
|
|
|
list_for_each_entry(p, &br->port_list, list) {
|
|
if (!p->dev)
|
|
continue;
|
|
err = __br_netpoll_enable(p);
|
|
if (err)
|
|
goto fail;
|
|
}
|
|
|
|
out:
|
|
return err;
|
|
|
|
fail:
|
|
br_netpoll_cleanup(dev);
|
|
goto out;
|
|
}
|
|
|
|
void br_netpoll_disable(struct net_bridge_port *p)
|
|
{
|
|
struct netpoll *np = p->np;
|
|
|
|
if (!np)
|
|
return;
|
|
|
|
p->np = NULL;
|
|
|
|
__netpoll_free_async(np);
|
|
}
|
|
|
|
#endif
|
|
|
|
static int br_add_slave(struct net_device *dev, struct net_device *slave_dev)
|
|
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
|
|
return br_add_if(br, slave_dev);
|
|
}
|
|
|
|
static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
|
|
return br_del_if(br, slave_dev);
|
|
}
|
|
|
|
static const struct ethtool_ops br_ethtool_ops = {
|
|
.get_drvinfo = br_getinfo,
|
|
.get_link = ethtool_op_get_link,
|
|
};
|
|
|
|
static const struct net_device_ops br_netdev_ops = {
|
|
.ndo_open = br_dev_open,
|
|
.ndo_stop = br_dev_stop,
|
|
.ndo_init = br_dev_init,
|
|
.ndo_start_xmit = br_dev_xmit,
|
|
.ndo_get_stats64 = br_get_stats64,
|
|
.ndo_set_mac_address = br_set_mac_address,
|
|
.ndo_set_rx_mode = br_dev_set_multicast_list,
|
|
.ndo_change_rx_flags = br_dev_change_rx_flags,
|
|
.ndo_change_mtu = br_change_mtu,
|
|
.ndo_do_ioctl = br_dev_ioctl,
|
|
#ifdef CONFIG_NET_POLL_CONTROLLER
|
|
.ndo_netpoll_setup = br_netpoll_setup,
|
|
.ndo_netpoll_cleanup = br_netpoll_cleanup,
|
|
.ndo_poll_controller = br_poll_controller,
|
|
#endif
|
|
.ndo_add_slave = br_add_slave,
|
|
.ndo_del_slave = br_del_slave,
|
|
.ndo_fix_features = br_fix_features,
|
|
.ndo_fdb_add = br_fdb_add,
|
|
.ndo_fdb_del = br_fdb_delete,
|
|
.ndo_fdb_dump = br_fdb_dump,
|
|
.ndo_bridge_getlink = br_getlink,
|
|
.ndo_bridge_setlink = br_setlink,
|
|
.ndo_bridge_dellink = br_dellink,
|
|
.ndo_features_check = passthru_features_check,
|
|
};
|
|
|
|
static void br_dev_free(struct net_device *dev)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
|
|
free_percpu(br->stats);
|
|
free_netdev(dev);
|
|
}
|
|
|
|
static struct device_type br_type = {
|
|
.name = "bridge",
|
|
};
|
|
|
|
void br_dev_setup(struct net_device *dev)
|
|
{
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
|
|
eth_hw_addr_random(dev);
|
|
ether_setup(dev);
|
|
|
|
dev->netdev_ops = &br_netdev_ops;
|
|
dev->destructor = br_dev_free;
|
|
dev->ethtool_ops = &br_ethtool_ops;
|
|
SET_NETDEV_DEVTYPE(dev, &br_type);
|
|
dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
|
|
|
|
dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
|
|
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
|
|
dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
|
|
NETIF_F_HW_VLAN_STAG_TX;
|
|
dev->vlan_features = COMMON_FEATURES;
|
|
|
|
br->dev = dev;
|
|
spin_lock_init(&br->lock);
|
|
INIT_LIST_HEAD(&br->port_list);
|
|
spin_lock_init(&br->hash_lock);
|
|
|
|
br->bridge_id.prio[0] = 0x80;
|
|
br->bridge_id.prio[1] = 0x00;
|
|
|
|
ether_addr_copy(br->group_addr, eth_reserved_addr_base);
|
|
|
|
br->stp_enabled = BR_NO_STP;
|
|
br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
|
|
br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;
|
|
|
|
br->designated_root = br->bridge_id;
|
|
br->bridge_max_age = br->max_age = 20 * HZ;
|
|
br->bridge_hello_time = br->hello_time = 2 * HZ;
|
|
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
|
|
br->ageing_time = BR_DEFAULT_AGEING_TIME;
|
|
|
|
br_netfilter_rtable_init(br);
|
|
br_stp_timer_init(br);
|
|
br_multicast_init(br);
|
|
}
|