Fix for TPROXY panic (rhbz 1370061)
Fix for known oom regression
This commit is contained in:
parent
49699bdee7
commit
4e55c73613
|
@ -0,0 +1,121 @@
|
|||
From a7f80308bac4013728e33e2bcb9b60eee78f60fb Mon Sep 17 00:00:00 2001
|
||||
From: Michal Hocko <mhocko@kernel.org>
|
||||
Date: Mon, 22 Aug 2016 11:32:49 +0200
|
||||
Subject: [PATCH] OOM detection regressions since 4.7
|
||||
|
||||
Hi,
|
||||
there have been multiple reports [1][2][3][4][5] about pre-mature OOM
|
||||
killer invocations since 4.7 which contains oom detection rework. All of
|
||||
them were for order-2 (kernel stack) alloaction requests failing because
|
||||
of a high fragmentation and compaction failing to make any forward
|
||||
progress. While investigating this we have found out that the compaction
|
||||
just gives up too early. Vlastimil has been working on compaction
|
||||
improvement for quite some time and his series [6] is already sitting
|
||||
in mmotm tree. This already helps a lot because it drops some heuristics
|
||||
which are more aimed at lower latencies for high orders rather than
|
||||
reliability. Joonsoo has then identified further problem with too many
|
||||
blocks being marked as unmovable [7] and Vlastimil has prepared a patch
|
||||
on top of his series [8] which is also in the mmotm tree now.
|
||||
|
||||
That being said, the regression is real and should be fixed for 4.7
|
||||
stable users. [6][8] was reported to help and ooms are no longer
|
||||
reproducible. I know we are quite late (rc3) in 4.8 but I would vote
|
||||
for mergeing those patches and have them in 4.8. For 4.7 I would go
|
||||
with a partial revert of the detection rework for high order requests
|
||||
(see patch below). This patch is really trivial. If those compaction
|
||||
improvements are just too large for 4.8 then we can use the same patch
|
||||
as for 4.7 stable for now and revert it in 4.9 after compaction changes
|
||||
are merged.
|
||||
|
||||
Thoughts?
|
||||
|
||||
[1] http://lkml.kernel.org/r/20160731051121.GB307@x4
|
||||
[2] http://lkml.kernel.org/r/201608120901.41463.a.miskiewicz@gmail.com
|
||||
[3] http://lkml.kernel.org/r/20160801192620.GD31957@dhcp22.suse.cz
|
||||
[4] https://lists.opensuse.org/opensuse-kernel/2016-08/msg00021.html
|
||||
[5] https://bugzilla.opensuse.org/show_bug.cgi?id=994066
|
||||
[6] http://lkml.kernel.org/r/20160810091226.6709-1-vbabka@suse.cz
|
||||
[7] http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE
|
||||
[8] http://lkml.kernel.org/r/f7a9ea9d-bb88-bfd6-e340-3a933559305a@suse.cz
|
||||
---
|
||||
mm/page_alloc.c | 50 ++------------------------------------------------
|
||||
1 file changed, 2 insertions(+), 48 deletions(-)
|
||||
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index 8b3e134..6e35419 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -3254,53 +3254,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-static inline bool
|
||||
-should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
|
||||
- enum compact_result compact_result, enum migrate_mode *migrate_mode,
|
||||
- int compaction_retries)
|
||||
-{
|
||||
- int max_retries = MAX_COMPACT_RETRIES;
|
||||
-
|
||||
- if (!order)
|
||||
- return false;
|
||||
-
|
||||
- /*
|
||||
- * compaction considers all the zone as desperately out of memory
|
||||
- * so it doesn't really make much sense to retry except when the
|
||||
- * failure could be caused by weak migration mode.
|
||||
- */
|
||||
- if (compaction_failed(compact_result)) {
|
||||
- if (*migrate_mode == MIGRATE_ASYNC) {
|
||||
- *migrate_mode = MIGRATE_SYNC_LIGHT;
|
||||
- return true;
|
||||
- }
|
||||
- return false;
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * make sure the compaction wasn't deferred or didn't bail out early
|
||||
- * due to locks contention before we declare that we should give up.
|
||||
- * But do not retry if the given zonelist is not suitable for
|
||||
- * compaction.
|
||||
- */
|
||||
- if (compaction_withdrawn(compact_result))
|
||||
- return compaction_zonelist_suitable(ac, order, alloc_flags);
|
||||
-
|
||||
- /*
|
||||
- * !costly requests are much more important than __GFP_REPEAT
|
||||
- * costly ones because they are de facto nofail and invoke OOM
|
||||
- * killer to move on while costly can fail and users are ready
|
||||
- * to cope with that. 1/4 retries is rather arbitrary but we
|
||||
- * would need much more detailed feedback from compaction to
|
||||
- * make a better decision.
|
||||
- */
|
||||
- if (order > PAGE_ALLOC_COSTLY_ORDER)
|
||||
- max_retries /= 4;
|
||||
- if (compaction_retries <= max_retries)
|
||||
- return true;
|
||||
-
|
||||
- return false;
|
||||
-}
|
||||
#else
|
||||
static inline struct page *
|
||||
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
|
||||
@@ -3311,6 +3264,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
+#endif /* CONFIG_COMPACTION */
|
||||
+
|
||||
static inline bool
|
||||
should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
|
||||
enum compact_result compact_result,
|
||||
@@ -3337,7 +3292,6 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
|
||||
}
|
||||
return false;
|
||||
}
|
||||
-#endif /* CONFIG_COMPACTION */
|
||||
|
||||
/* Perform direct synchronous page reclaim */
|
||||
static int
|
||||
--
|
||||
2.7.4
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
From patchwork Wed Aug 17 16:04:31 2016
|
||||
Content-Type: text/plain; charset="utf-8"
|
||||
MIME-Version: 1.0
|
||||
Content-Transfer-Encoding: 7bit
|
||||
Subject: kernel panic TPROXY , vanilla 4.7.1
|
||||
From: Eric Dumazet <eric.dumazet@gmail.com>
|
||||
X-Patchwork-Id: 660174
|
||||
X-Patchwork-Delegate: davem@davemloft.net
|
||||
Message-Id: <1471449871.29842.3.camel@edumazet-glaptop3.roam.corp.google.com>
|
||||
To: Denys Fedoryshchenko <nuclearcat@nuclearcat.com>
|
||||
Cc: Linux Kernel Network Developers <netdev@vger.kernel.org>,
|
||||
netfilter-devel@vger.kernel.org
|
||||
Date: Wed, 17 Aug 2016 09:04:31 -0700
|
||||
|
||||
On Wed, 2016-08-17 at 08:42 -0700, Eric Dumazet wrote:
|
||||
> On Wed, 2016-08-17 at 17:31 +0300, Denys Fedoryshchenko wrote:
|
||||
> > Hi!
|
||||
> >
|
||||
> > Tried to run squid on latest kernel, and hit a panic
|
||||
> > Sometimes it just shows warning in dmesg (but doesnt work properly)
|
||||
> > [ 75.701666] IPv4: Attempt to release TCP socket in state 10
|
||||
> > ffff88102d430780
|
||||
> > [ 83.866974] squid (2700) used greatest stack depth: 12912 bytes left
|
||||
> > [ 87.506644] IPv4: Attempt to release TCP socket in state 10
|
||||
> > ffff880078a48780
|
||||
> > [ 114.704295] IPv4: Attempt to release TCP socket in state 10
|
||||
> > ffff881029f8ad00
|
||||
> >
|
||||
> > I cannot catch yet oops/panic message, netconsole not working.
|
||||
> >
|
||||
> > After triggering warning message 3 times, i am unable to run squid
|
||||
> > anymore (without reboot), and in netstat it doesnt show port running.
|
||||
> >
|
||||
> > firewall is:
|
||||
> > *mangle
|
||||
> > -A PREROUTING -p tcp -m socket -j DIVERT
|
||||
> > -A PREROUTING -p tcp -m tcp --dport 80 -i eno1 -j TPROXY --on-port 3129
|
||||
> > --on-ip 0.0.0.0 --tproxy-mark 0x1/0x1
|
||||
> > -A DIVERT -j MARK --set-xmark 0x1/0xffffffff
|
||||
> > -A DIVERT -j ACCEPT
|
||||
> >
|
||||
> > routing
|
||||
> > ip rule add fwmark 1 lookup 100
|
||||
> > ip route add local default dev eno1 table 100
|
||||
> >
|
||||
> >
|
||||
> > squid config is default with tproxy option
|
||||
> > http_port 3129 tproxy
|
||||
> >
|
||||
>
|
||||
> Hmppff... sorry for this, I will send a fix.
|
||||
>
|
||||
> Thanks for the report !
|
||||
>
|
||||
|
||||
|
||||
Could you try the following ?
|
||||
|
||||
Thanks !
|
||||
|
||||
net/netfilter/xt_TPROXY.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
|
||||
index 7f4414d26a66..663c4c3c9072 100644
|
||||
--- a/net/netfilter/xt_TPROXY.c
|
||||
+++ b/net/netfilter/xt_TPROXY.c
|
||||
@@ -127,6 +127,8 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
|
||||
daddr, dport,
|
||||
in->ifindex);
|
||||
|
||||
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
|
||||
+ sk = NULL;
|
||||
/* NOTE: we return listeners even if bound to
|
||||
* 0.0.0.0, those are filtered out in
|
||||
* xt_socket, since xt_TPROXY needs 0 bound
|
||||
@@ -195,6 +197,8 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
|
||||
daddr, ntohs(dport),
|
||||
in->ifindex);
|
||||
|
||||
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
|
||||
+ sk = NULL;
|
||||
/* NOTE: we return listeners even if bound to
|
||||
* 0.0.0.0, those are filtered out in
|
||||
* xt_socket, since xt_TPROXY needs 0 bound
|
10
kernel.spec
10
kernel.spec
|
@ -635,6 +635,12 @@ Patch855: aacraid-Check-size-values-after-double-fetch-from-us.patch
|
|||
#rhbz 1365940
|
||||
Patch856: 0001-udp-fix-poll-issue-with-zero-sized-packets.patch
|
||||
|
||||
#rhbz 13700161
|
||||
Patch857: kernel-panic-TPROXY-vanilla-4.7.1.patch
|
||||
|
||||
# lkml.kernel.org/r/<20160822093249.GA14916@dhcp22.suse.cz>
|
||||
Patch858: 0001-OOM-detection-regressions-since-4.7.patch
|
||||
|
||||
# END OF PATCH DEFINITIONS
|
||||
|
||||
%endif
|
||||
|
@ -2162,6 +2168,10 @@ fi
|
|||
#
|
||||
#
|
||||
%changelog
|
||||
* Thu Aug 25 2016 Laura Abbott <labbott@fedoraproject.org>
|
||||
- Fix for TPROXY panic (rhbz 1370061)
|
||||
- Fix for known OOM regression
|
||||
|
||||
* Tue Aug 23 2016 Laura Abbott <labbot@fedoraproject.org>
|
||||
- Fix for inabiltiy to send zero sized UDP packets (rhbz 1365940)
|
||||
|
||||
|
|
Loading…
Reference in New Issue