3.5.5 stable queue and bugfixes
This commit is contained in:
parent
5e2cf58050
commit
e1089320be
4108
3.5-git-stable.patch
4108
3.5-git-stable.patch
File diff suppressed because it is too large
Load Diff
|
@ -1,114 +0,0 @@
|
|||
Subject: [PATCH] af_netlink: force credentials passing [CVE-2012-3520]
|
||||
From: Eric Dumazet <eric.dumazet@gmail.com>
|
||||
To: David Miller <davem@davemloft.net>
|
||||
Cc: netdev <netdev@vger.kernel.org>, Petr Matousek <pmatouse@redhat.com>,
|
||||
Florian Weimer <fweimer@redhat.com>,
|
||||
Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Date: Tue, 21 Aug 2012 18:21:17 +0200
|
||||
Message-ID: <1345566077.5158.530.camel@edumazet-glaptop>
|
||||
Mime-Version: 1.0
|
||||
Content-Transfer-Encoding: 7bit
|
||||
Sender: netdev-owner@vger.kernel.org
|
||||
Precedence: bulk
|
||||
List-ID: <netdev.vger.kernel.org>
|
||||
X-Mailing-List: netdev@vger.kernel.org
|
||||
X-RedHat-Spam-Score: -6.999 (BAYES_00,DKIM_ADSP_CUSTOM_MED,DKIM_SIGNED,FREEMAIL_FROM,RCVD_IN_DNSWL_HI,RP_MATCHES_RCVD,T_DKIM_INVALID)
|
||||
X-Scanned-By: MIMEDefang 2.68 on 10.5.11.22
|
||||
X-Scanned-By: MIMEDefang 2.68 on 10.5.110.16
|
||||
Status: RO
|
||||
Content-Length: 3042
|
||||
Lines: 91
|
||||
|
||||
From: Eric Dumazet <edumazet@google.com>
|
||||
|
||||
Pablo Neira Ayuso discovered that avahi and
|
||||
potentially NetworkManager accept spoofed Netlink messages because of a
|
||||
kernel bug. The kernel passes all-zero SCM_CREDENTIALS ancillary data
|
||||
to the receiver if the sender did not provide such data, instead of not
|
||||
including any such data at all or including the correct data from the
|
||||
peer (as it is the case with AF_UNIX).
|
||||
|
||||
This bug was introduced in commit 16e572626961
|
||||
(af_unix: dont send SCM_CREDENTIALS by default)
|
||||
|
||||
This patch forces passing credentials for netlink, as
|
||||
before the regression.
|
||||
|
||||
Another fix would be to not add SCM_CREDENTIALS in
|
||||
netlink messages if not provided by the sender, but it
|
||||
might break some programs.
|
||||
|
||||
With help from Florian Weimer & Petr Matousek
|
||||
|
||||
This issue is designated as CVE-2012-3520
|
||||
|
||||
Signed-off-by: Eric Dumazet <edumazet@google.com>
|
||||
Cc: Petr Matousek <pmatouse@redhat.com>
|
||||
Cc: Florian Weimer <fweimer@redhat.com>
|
||||
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
include/net/scm.h | 4 +++-
|
||||
net/netlink/af_netlink.c | 2 +-
|
||||
net/unix/af_unix.c | 4 ++--
|
||||
3 files changed, 6 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/include/net/scm.h b/include/net/scm.h
|
||||
index 079d788..7dc0854 100644
|
||||
--- a/include/net/scm.h
|
||||
+++ b/include/net/scm.h
|
||||
@@ -70,9 +70,11 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
|
||||
}
|
||||
|
||||
static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
|
||||
- struct scm_cookie *scm)
|
||||
+ struct scm_cookie *scm, bool forcecreds)
|
||||
{
|
||||
memset(scm, 0, sizeof(*scm));
|
||||
+ if (forcecreds)
|
||||
+ scm_set_cred(scm, task_tgid(current), current_cred());
|
||||
unix_get_peersec_dgram(sock, scm);
|
||||
if (msg->msg_controllen <= 0)
|
||||
return 0;
|
||||
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
|
||||
index 5463969..1445d73 100644
|
||||
--- a/net/netlink/af_netlink.c
|
||||
+++ b/net/netlink/af_netlink.c
|
||||
@@ -1362,7 +1362,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
|
||||
if (NULL == siocb->scm)
|
||||
siocb->scm = &scm;
|
||||
|
||||
- err = scm_send(sock, msg, siocb->scm);
|
||||
+ err = scm_send(sock, msg, siocb->scm, true);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
|
||||
index e4768c1..c5ee4ff 100644
|
||||
--- a/net/unix/af_unix.c
|
||||
+++ b/net/unix/af_unix.c
|
||||
@@ -1450,7 +1450,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
|
||||
if (NULL == siocb->scm)
|
||||
siocb->scm = &tmp_scm;
|
||||
wait_for_unix_gc();
|
||||
- err = scm_send(sock, msg, siocb->scm);
|
||||
+ err = scm_send(sock, msg, siocb->scm, false);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
@@ -1619,7 +1619,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
|
||||
if (NULL == siocb->scm)
|
||||
siocb->scm = &tmp_scm;
|
||||
wait_for_unix_gc();
|
||||
- err = scm_send(sock, msg, siocb->scm);
|
||||
+ err = scm_send(sock, msg, siocb->scm, false);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
|
||||
|
||||
--
|
||||
To unsubscribe from this list: send the line "unsubscribe netdev" in
|
||||
the body of a message to majordomo@vger.kernel.org
|
||||
More majordomo info at http://vger.kernel.org/majordomo-info.html
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
From: Christian König <deathsimple@vodafone.de>
|
||||
|
||||
Only increase the higher 32bits if we really detect a wrap around.
|
||||
|
||||
v2: instead of increasing the higher 32bits just use the higher
|
||||
32bits from the last emitted fence.
|
||||
v3: also use last emitted fence value as upper limit.
|
||||
|
||||
The intention of this patch is to make fences as robust as
|
||||
they where before introducing 64bit fences. This is
|
||||
necessary because on older systems it looks like the fence
|
||||
value gets corrupted on initialization.
|
||||
|
||||
Fixes:
|
||||
https://bugs.freedesktop.org/show_bug.cgi?id=51344
|
||||
|
||||
Should also fix:
|
||||
https://bugs.freedesktop.org/show_bug.cgi?id=54129
|
||||
https://bugs.freedesktop.org/show_bug.cgi?id=54662
|
||||
https://bugzilla.redhat.com/show_bug.cgi?id=846505
|
||||
https://bugzilla.redhat.com/show_bug.cgi?id=845639
|
||||
|
||||
This is the 3.5 stable version of the patch:
|
||||
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=f492c171a38d77fc13a8998a0721f2da50835224
|
||||
3.4 and previous kernels do not need to be patched.
|
||||
|
||||
Signed-off-by: Christian König <deathsimple@vodafone.de>
|
||||
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
|
||||
---
|
||||
drivers/gpu/drm/radeon/radeon_fence.c | 8 +++++---
|
||||
1 files changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
|
||||
index 11f5f40..71298ce 100644
|
||||
--- a/drivers/gpu/drm/radeon/radeon_fence.c
|
||||
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
|
||||
@@ -75,7 +75,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
|
||||
|
||||
void radeon_fence_process(struct radeon_device *rdev, int ring)
|
||||
{
|
||||
- uint64_t seq, last_seq;
|
||||
+ uint64_t seq, last_seq, last_emitted;
|
||||
unsigned count_loop = 0;
|
||||
bool wake = false;
|
||||
|
||||
@@ -102,13 +102,15 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
|
||||
*/
|
||||
last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
|
||||
do {
|
||||
+ last_emitted = rdev->fence_drv[ring].seq;
|
||||
seq = radeon_fence_read(rdev, ring);
|
||||
seq |= last_seq & 0xffffffff00000000LL;
|
||||
if (seq < last_seq) {
|
||||
- seq += 0x100000000LL;
|
||||
+ seq &= 0xffffffff;
|
||||
+ seq |= last_emitted & 0xffffffff00000000LL;
|
||||
}
|
||||
|
||||
- if (seq == last_seq) {
|
||||
+ if (seq <= last_seq || seq > last_emitted) {
|
||||
break;
|
||||
}
|
||||
/* If we loop over we don't want to return without
|
||||
--
|
||||
1.7.7.5
|
||||
|
||||
--
|
||||
To unsubscribe from this list: send the line "unsubscribe stable" in
|
||||
the body of a message to majordomo@vger.kernel.org
|
||||
More majordomo info at http://vger.kernel.org/majordomo-info.html
|
33
kernel.spec
33
kernel.spec
|
@ -755,14 +755,16 @@ Patch22060: CPU-hotplug-cpusets-suspend-Dont-modify-cpusets-during.patch
|
|||
#rhbz 820039 843554
|
||||
Patch22061: rds-set-correct-msg_namelen.patch
|
||||
|
||||
#rhbz 845558 844714
|
||||
Patch22070: net-Allow-driver-to-limit-number-of-GSO-segments-per-skb.patch
|
||||
Patch22071: sfc-Fix-maximum-number-of-TSO-segments-and-minimum-TX-queue-size.patch
|
||||
Patch22072: tcp-Apply-device-TSO-segment-limit-earlier.patch
|
||||
|
||||
Patch23000: fbcon-fix-race-condition-between-console-lock-and-cursor-timer.patch
|
||||
|
||||
Patch24000: af_netlink-credentials-cve-2012-3520.patch
|
||||
#rhbz 850350
|
||||
Patch24050: xen-pciback-restore-pci-config-space-after-FLR.patch
|
||||
|
||||
#rhbz 846505 845639
|
||||
Patch24055: drm-radeon-make-64bit-fences-more-robust.patch
|
||||
|
||||
#3.5.5 stable queue
|
||||
Patch25000: linux-3.5.5-stable-queue.patch
|
||||
|
||||
# END OF PATCH DEFINITIONS
|
||||
|
||||
|
@ -1459,14 +1461,16 @@ ApplyPatch CPU-hotplug-cpusets-suspend-Dont-modify-cpusets-during.patch
|
|||
#rhbz 820039 843554
|
||||
ApplyPatch rds-set-correct-msg_namelen.patch
|
||||
|
||||
#rhbz 845558 844714
|
||||
ApplyPatch net-Allow-driver-to-limit-number-of-GSO-segments-per-skb.patch
|
||||
ApplyPatch sfc-Fix-maximum-number-of-TSO-segments-and-minimum-TX-queue-size.patch
|
||||
ApplyPatch tcp-Apply-device-TSO-segment-limit-earlier.patch
|
||||
|
||||
ApplyPatch fbcon-fix-race-condition-between-console-lock-and-cursor-timer.patch
|
||||
|
||||
ApplyPatch af_netlink-credentials-cve-2012-3520.patch
|
||||
#rhbz 850350
|
||||
ApplyPatch xen-pciback-restore-pci-config-space-after-FLR.patch
|
||||
|
||||
#rhbz 846505 845639
|
||||
ApplyPatch drm-radeon-make-64bit-fences-more-robust.patch
|
||||
|
||||
# 3.5.5 stable queue
|
||||
ApplyPatch linux-3.5.5-stable-queue.patch
|
||||
|
||||
# END OF PATCH APPLICATIONS
|
||||
|
||||
|
@ -2329,6 +2333,11 @@ fi
|
|||
# '-' | |
|
||||
# '-'
|
||||
%changelog
|
||||
* Wed Sep 26 2012 Justin M. Forbes <jforbes@redhat.com> 3.5.4-2
|
||||
- xen Restore the PCI config space after an FLR (rhbz 850350)
|
||||
- drm/radeon make 64bit fences more robust (rhbz 846505 845639)
|
||||
- Apply current 3.5.5 stable queue
|
||||
|
||||
* Fri Sep 21 2012 Josh Boyer <jwboyer@redhat.com> 3.4.11-2
|
||||
- Add patch to fix radeon regression from Jerome Glisse (rhbz 785375)
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,70 +0,0 @@
|
|||
From 30b678d844af3305cda5953467005cebb5d7b687 Mon Sep 17 00:00:00 2001
|
||||
From: Ben Hutchings <bhutchings@solarflare.com>
|
||||
Date: Mon, 30 Jul 2012 15:57:00 +0000
|
||||
Subject: [PATCH] net: Allow driver to limit number of GSO segments per skb
|
||||
|
||||
A peer (or local user) may cause TCP to use a nominal MSS of as little
|
||||
as 88 (actual MSS of 76 with timestamps). Given that we have a
|
||||
sufficiently prodigious local sender and the peer ACKs quickly enough,
|
||||
it is nevertheless possible to grow the window for such a connection
|
||||
to the point that we will try to send just under 64K at once. This
|
||||
results in a single skb that expands to 861 segments.
|
||||
|
||||
In some drivers with TSO support, such an skb will require hundreds of
|
||||
DMA descriptors; a substantial fraction of a TX ring or even more than
|
||||
a full ring. The TX queue selected for the skb may stall and trigger
|
||||
the TX watchdog repeatedly (since the problem skb will be retried
|
||||
after the TX reset). This particularly affects sfc, for which the
|
||||
issue is designated as CVE-2012-3412.
|
||||
|
||||
Therefore:
|
||||
1. Add the field net_device::gso_max_segs holding the device-specific
|
||||
limit.
|
||||
2. In netif_skb_features(), if the number of segments is too high then
|
||||
mask out GSO features to force fall back to software GSO.
|
||||
|
||||
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
include/linux/netdevice.h | 2 ++
|
||||
net/core/dev.c | 4 ++++
|
||||
2 files changed, 6 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
|
||||
index eb06e58..a9db4f3 100644
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -1300,6 +1300,8 @@ struct net_device {
|
||||
/* for setting kernel sock attribute on TCP connection setup */
|
||||
#define GSO_MAX_SIZE 65536
|
||||
unsigned int gso_max_size;
|
||||
+#define GSO_MAX_SEGS 65535
|
||||
+ u16 gso_max_segs;
|
||||
|
||||
#ifdef CONFIG_DCB
|
||||
/* Data Center Bridging netlink ops */
|
||||
diff --git a/net/core/dev.c b/net/core/dev.c
|
||||
index 0cb3fe8..f91abf8 100644
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -2134,6 +2134,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
|
||||
__be16 protocol = skb->protocol;
|
||||
netdev_features_t features = skb->dev->features;
|
||||
|
||||
+ if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
|
||||
+ features &= ~NETIF_F_GSO_MASK;
|
||||
+
|
||||
if (protocol == htons(ETH_P_8021Q)) {
|
||||
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
|
||||
protocol = veh->h_vlan_encapsulated_proto;
|
||||
@@ -5986,6 +5989,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
|
||||
dev_net_set(dev, &init_net);
|
||||
|
||||
dev->gso_max_size = GSO_MAX_SIZE;
|
||||
+ dev->gso_max_segs = GSO_MAX_SEGS;
|
||||
|
||||
INIT_LIST_HEAD(&dev->napi_list);
|
||||
INIT_LIST_HEAD(&dev->unreg_list);
|
||||
--
|
||||
1.7.7.6
|
||||
|
|
@ -1,156 +0,0 @@
|
|||
From 7e6d06f0de3f74ca929441add094518ae332257c Mon Sep 17 00:00:00 2001
|
||||
From: Ben Hutchings <bhutchings@solarflare.com>
|
||||
Date: Mon, 30 Jul 2012 15:57:44 +0000
|
||||
Subject: [PATCH] sfc: Fix maximum number of TSO segments and minimum TX queue
|
||||
size
|
||||
|
||||
Currently an skb requiring TSO may not fit within a minimum-size TX
|
||||
queue. The TX queue selected for the skb may stall and trigger the TX
|
||||
watchdog repeatedly (since the problem skb will be retried after the
|
||||
TX reset). This issue is designated as CVE-2012-3412.
|
||||
|
||||
Set the maximum number of TSO segments for our devices to 100. This
|
||||
should make no difference to behaviour unless the actual MSS is less
|
||||
than about 700. Increase the minimum TX queue size accordingly to
|
||||
allow for 2 worst-case skbs, so that there will definitely be space
|
||||
to add an skb after we wake a queue.
|
||||
|
||||
To avoid invalidating existing configurations, change
|
||||
efx_ethtool_set_ringparam() to fix up values that are too small rather
|
||||
than returning -EINVAL.
|
||||
|
||||
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
drivers/net/ethernet/sfc/efx.c | 6 ++++++
|
||||
drivers/net/ethernet/sfc/efx.h | 14 ++++++++++----
|
||||
drivers/net/ethernet/sfc/ethtool.c | 16 +++++++++++-----
|
||||
drivers/net/ethernet/sfc/tx.c | 19 +++++++++++++++++++
|
||||
4 files changed, 46 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
|
||||
index 70554a1..65a8d49 100644
|
||||
--- a/drivers/net/ethernet/sfc/efx.c
|
||||
+++ b/drivers/net/ethernet/sfc/efx.c
|
||||
@@ -1503,6 +1503,11 @@ static int efx_probe_all(struct efx_nic *efx)
|
||||
goto fail2;
|
||||
}
|
||||
|
||||
+ BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT);
|
||||
+ if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) {
|
||||
+ rc = -EINVAL;
|
||||
+ goto fail3;
|
||||
+ }
|
||||
efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE;
|
||||
|
||||
rc = efx_probe_filters(efx);
|
||||
@@ -2070,6 +2075,7 @@ static int efx_register_netdev(struct efx_nic *efx)
|
||||
net_dev->irq = efx->pci_dev->irq;
|
||||
net_dev->netdev_ops = &efx_netdev_ops;
|
||||
SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops);
|
||||
+ net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
|
||||
|
||||
rtnl_lock();
|
||||
|
||||
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
|
||||
index be8f915..70755c9 100644
|
||||
--- a/drivers/net/ethernet/sfc/efx.h
|
||||
+++ b/drivers/net/ethernet/sfc/efx.h
|
||||
@@ -30,6 +30,7 @@ extern netdev_tx_t
|
||||
efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
|
||||
extern void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
|
||||
extern int efx_setup_tc(struct net_device *net_dev, u8 num_tc);
|
||||
+extern unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
|
||||
|
||||
/* RX */
|
||||
extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
|
||||
@@ -52,10 +53,15 @@ extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
|
||||
#define EFX_MAX_EVQ_SIZE 16384UL
|
||||
#define EFX_MIN_EVQ_SIZE 512UL
|
||||
|
||||
-/* The smallest [rt]xq_entries that the driver supports. Callers of
|
||||
- * efx_wake_queue() assume that they can subsequently send at least one
|
||||
- * skb. Falcon/A1 may require up to three descriptors per skb_frag. */
|
||||
-#define EFX_MIN_RING_SIZE (roundup_pow_of_two(2 * 3 * MAX_SKB_FRAGS))
|
||||
+/* Maximum number of TCP segments we support for soft-TSO */
|
||||
+#define EFX_TSO_MAX_SEGS 100
|
||||
+
|
||||
+/* The smallest [rt]xq_entries that the driver supports. RX minimum
|
||||
+ * is a bit arbitrary. For TX, we must have space for at least 2
|
||||
+ * TSO skbs.
|
||||
+ */
|
||||
+#define EFX_RXQ_MIN_ENT 128U
|
||||
+#define EFX_TXQ_MIN_ENT(efx) (2 * efx_tx_max_skb_descs(efx))
|
||||
|
||||
/* Filters */
|
||||
extern int efx_probe_filters(struct efx_nic *efx);
|
||||
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
|
||||
index 10536f9..8cba2df 100644
|
||||
--- a/drivers/net/ethernet/sfc/ethtool.c
|
||||
+++ b/drivers/net/ethernet/sfc/ethtool.c
|
||||
@@ -680,21 +680,27 @@ static int efx_ethtool_set_ringparam(struct net_device *net_dev,
|
||||
struct ethtool_ringparam *ring)
|
||||
{
|
||||
struct efx_nic *efx = netdev_priv(net_dev);
|
||||
+ u32 txq_entries;
|
||||
|
||||
if (ring->rx_mini_pending || ring->rx_jumbo_pending ||
|
||||
ring->rx_pending > EFX_MAX_DMAQ_SIZE ||
|
||||
ring->tx_pending > EFX_MAX_DMAQ_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
- if (ring->rx_pending < EFX_MIN_RING_SIZE ||
|
||||
- ring->tx_pending < EFX_MIN_RING_SIZE) {
|
||||
+ if (ring->rx_pending < EFX_RXQ_MIN_ENT) {
|
||||
netif_err(efx, drv, efx->net_dev,
|
||||
- "TX and RX queues cannot be smaller than %ld\n",
|
||||
- EFX_MIN_RING_SIZE);
|
||||
+ "RX queues cannot be smaller than %u\n",
|
||||
+ EFX_RXQ_MIN_ENT);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- return efx_realloc_channels(efx, ring->rx_pending, ring->tx_pending);
|
||||
+ txq_entries = max(ring->tx_pending, EFX_TXQ_MIN_ENT(efx));
|
||||
+ if (txq_entries != ring->tx_pending)
|
||||
+ netif_warn(efx, drv, efx->net_dev,
|
||||
+ "increasing TX queue size to minimum of %u\n",
|
||||
+ txq_entries);
|
||||
+
|
||||
+ return efx_realloc_channels(efx, ring->rx_pending, txq_entries);
|
||||
}
|
||||
|
||||
static int efx_ethtool_set_pauseparam(struct net_device *net_dev,
|
||||
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
|
||||
index 9b225a7..1871343 100644
|
||||
--- a/drivers/net/ethernet/sfc/tx.c
|
||||
+++ b/drivers/net/ethernet/sfc/tx.c
|
||||
@@ -119,6 +119,25 @@ efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
|
||||
return len;
|
||||
}
|
||||
|
||||
+unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
|
||||
+{
|
||||
+ /* Header and payload descriptor for each output segment, plus
|
||||
+ * one for every input fragment boundary within a segment
|
||||
+ */
|
||||
+ unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
|
||||
+
|
||||
+ /* Possibly one more per segment for the alignment workaround */
|
||||
+ if (EFX_WORKAROUND_5391(efx))
|
||||
+ max_descs += EFX_TSO_MAX_SEGS;
|
||||
+
|
||||
+ /* Possibly more for PCIe page boundaries within input fragments */
|
||||
+ if (PAGE_SIZE > EFX_PAGE_SIZE)
|
||||
+ max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
|
||||
+ DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE));
|
||||
+
|
||||
+ return max_descs;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Add a socket buffer to a TX queue
|
||||
*
|
||||
--
|
||||
1.7.7.6
|
||||
|
|
@ -1,137 +0,0 @@
|
|||
From 1485348d2424e1131ea42efc033cbd9366462b01 Mon Sep 17 00:00:00 2001
|
||||
From: Ben Hutchings <bhutchings@solarflare.com>
|
||||
Date: Mon, 30 Jul 2012 16:11:42 +0000
|
||||
Subject: [PATCH] tcp: Apply device TSO segment limit earlier
|
||||
|
||||
Cache the device gso_max_segs in sock::sk_gso_max_segs and use it to
|
||||
limit the size of TSO skbs. This avoids the need to fall back to
|
||||
software GSO for local TCP senders.
|
||||
|
||||
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
include/net/sock.h | 2 ++
|
||||
net/core/sock.c | 1 +
|
||||
net/ipv4/tcp.c | 4 +++-
|
||||
net/ipv4/tcp_cong.c | 3 ++-
|
||||
net/ipv4/tcp_output.c | 21 ++++++++++++---------
|
||||
5 files changed, 20 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/include/net/sock.h b/include/net/sock.h
|
||||
index b373023..72132ae 100644
|
||||
--- a/include/net/sock.h
|
||||
+++ b/include/net/sock.h
|
||||
@@ -218,6 +218,7 @@ struct cg_proto;
|
||||
* @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
|
||||
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
|
||||
* @sk_gso_max_size: Maximum GSO segment size to build
|
||||
+ * @sk_gso_max_segs: Maximum number of GSO segments
|
||||
* @sk_lingertime: %SO_LINGER l_linger setting
|
||||
* @sk_backlog: always used with the per-socket spinlock held
|
||||
* @sk_callback_lock: used with the callbacks in the end of this struct
|
||||
@@ -338,6 +339,7 @@ struct sock {
|
||||
netdev_features_t sk_route_nocaps;
|
||||
int sk_gso_type;
|
||||
unsigned int sk_gso_max_size;
|
||||
+ u16 sk_gso_max_segs;
|
||||
int sk_rcvlowat;
|
||||
unsigned long sk_lingertime;
|
||||
struct sk_buff_head sk_error_queue;
|
||||
diff --git a/net/core/sock.c b/net/core/sock.c
|
||||
index 6b654b3..8f67ced 100644
|
||||
--- a/net/core/sock.c
|
||||
+++ b/net/core/sock.c
|
||||
@@ -1458,6 +1458,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
|
||||
} else {
|
||||
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
|
||||
sk->sk_gso_max_size = dst->dev->gso_max_size;
|
||||
+ sk->sk_gso_max_segs = dst->dev->gso_max_segs;
|
||||
}
|
||||
}
|
||||
}
|
||||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
|
||||
index e7e6eea..2109ff4 100644
|
||||
--- a/net/ipv4/tcp.c
|
||||
+++ b/net/ipv4/tcp.c
|
||||
@@ -811,7 +811,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
|
||||
old_size_goal + mss_now > xmit_size_goal)) {
|
||||
xmit_size_goal = old_size_goal;
|
||||
} else {
|
||||
- tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
|
||||
+ tp->xmit_size_goal_segs =
|
||||
+ min_t(u16, xmit_size_goal / mss_now,
|
||||
+ sk->sk_gso_max_segs);
|
||||
xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
|
||||
}
|
||||
}
|
||||
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
|
||||
index 4d4db16..1432cdb 100644
|
||||
--- a/net/ipv4/tcp_cong.c
|
||||
+++ b/net/ipv4/tcp_cong.c
|
||||
@@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
|
||||
left = tp->snd_cwnd - in_flight;
|
||||
if (sk_can_gso(sk) &&
|
||||
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
|
||||
- left * tp->mss_cache < sk->sk_gso_max_size)
|
||||
+ left * tp->mss_cache < sk->sk_gso_max_size &&
|
||||
+ left < sk->sk_gso_max_segs)
|
||||
return true;
|
||||
return left <= tcp_max_tso_deferred_mss(tp);
|
||||
}
|
||||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
|
||||
index 3f1bcff..a7b3ec9 100644
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -1522,21 +1522,21 @@ static void tcp_cwnd_validate(struct sock *sk)
|
||||
* when we would be allowed to send the split-due-to-Nagle skb fully.
|
||||
*/
|
||||
static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
|
||||
- unsigned int mss_now, unsigned int cwnd)
|
||||
+ unsigned int mss_now, unsigned int max_segs)
|
||||
{
|
||||
const struct tcp_sock *tp = tcp_sk(sk);
|
||||
- u32 needed, window, cwnd_len;
|
||||
+ u32 needed, window, max_len;
|
||||
|
||||
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
|
||||
- cwnd_len = mss_now * cwnd;
|
||||
+ max_len = mss_now * max_segs;
|
||||
|
||||
- if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
|
||||
- return cwnd_len;
|
||||
+ if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
|
||||
+ return max_len;
|
||||
|
||||
needed = min(skb->len, window);
|
||||
|
||||
- if (cwnd_len <= needed)
|
||||
- return cwnd_len;
|
||||
+ if (max_len <= needed)
|
||||
+ return max_len;
|
||||
|
||||
return needed - needed % mss_now;
|
||||
}
|
||||
@@ -1765,7 +1765,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
|
||||
limit = min(send_win, cong_win);
|
||||
|
||||
/* If a full-sized TSO skb can be sent, do it. */
|
||||
- if (limit >= sk->sk_gso_max_size)
|
||||
+ if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
|
||||
+ sk->sk_gso_max_segs * tp->mss_cache))
|
||||
goto send_now;
|
||||
|
||||
/* Middle in queue won't get any more data, full sendable already? */
|
||||
@@ -1999,7 +2000,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||
limit = mss_now;
|
||||
if (tso_segs > 1 && !tcp_urg_mode(tp))
|
||||
limit = tcp_mss_split_point(sk, skb, mss_now,
|
||||
- cwnd_quota);
|
||||
+ min_t(unsigned int,
|
||||
+ cwnd_quota,
|
||||
+ sk->sk_gso_max_segs));
|
||||
|
||||
if (skb->len > limit &&
|
||||
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
|
||||
--
|
||||
1.7.7.6
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
When we do an FLR, or D0->D3_hot we may lose the BARs as the
|
||||
device has turned itself off (and on). This means the device cannot
|
||||
function unless the pci_restore_state is called - which it is
|
||||
when the PCI device is unbound from the Xen PCI backend driver.
|
||||
For PV guests it ends up calling pci_enable_device / pci_enable_msi[x]
|
||||
which does the proper steps
|
||||
|
||||
That however is not happening if a HVM guest is run as QEMU
|
||||
deals with PCI configuration space. QEMU also requires that the
|
||||
device be "parked" under the ownership of a pci-stub driver to
|
||||
guarantee that the PCI device is not being used. Hence we
|
||||
follow the same incantation as pci_reset_function does - by
|
||||
doing an FLR, then restoring the PCI configuration space.
|
||||
|
||||
The result of this patch is that when you run lspci, you get
|
||||
now this:
|
||||
|
||||
- Region 0: [virtual] Memory at fe8c0000 (32-bit, non-prefetchable) [size=128K]
|
||||
- Region 1: [virtual] Memory at fe800000 (32-bit, non-prefetchable) [size=512K]
|
||||
+ Region 0: Memory at fe8c0000 (32-bit, non-prefetchable) [size=128K]
|
||||
+ Region 1: Memory at fe800000 (32-bit, non-prefetchable) [size=512K]
|
||||
Region 2: I/O ports at c000 [size=32]
|
||||
- Region 3: [virtual] Memory at fe8e0000 (32-bit, non-prefetchable) [size=16K]
|
||||
+ Region 3: Memory at fe8e0000 (32-bit, non-prefetchable) [size=16K]
|
||||
|
||||
The [virtual] means that lspci read those entries from SysFS but when
|
||||
it read them from the device it got a different value (0xfffffff).
|
||||
|
||||
CC: stable@vger.kernel.org # only for v3.4 and v3.5
|
||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
---
|
||||
drivers/xen/xen-pciback/pci_stub.c | 1 +
|
||||
1 files changed, 1 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
|
||||
index acec6fa..e5a0c13 100644
|
||||
--- a/drivers/xen/xen-pciback/pci_stub.c
|
||||
+++ b/drivers/xen/xen-pciback/pci_stub.c
|
||||
@@ -362,6 +362,7 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
|
||||
else {
|
||||
dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n");
|
||||
__pci_reset_function_locked(dev);
|
||||
+ pci_restore_state(dev);
|
||||
}
|
||||
/* Now disable the device (this also ensures some private device
|
||||
* data is setup before we export)
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
--
|
||||
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
|
||||
the body of a message to majordomo@vger.kernel.org
|
||||
More majordomo info at http://vger.kernel.org/majordomo-info.html
|
||||
Please read the FAQ at http://www.tux.org/lkml/
|
Loading…
Reference in New Issue