Fix softlockup (rhbz 1492664 1492665)
This commit is contained in:
parent
f9f0809242
commit
078f4c3ff6
|
@ -0,0 +1,181 @@
|
|||
From 400e22499dd92613821374c8c6c88c7225359980 Mon Sep 17 00:00:00 2001
|
||||
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
|
||||
Date: Wed, 15 Nov 2017 17:38:37 -0800
|
||||
Subject: [PATCH] mm: don't warn about allocations which stall for too long
|
||||
|
||||
Commit 63f53dea0c98 ("mm: warn about allocations which stall for too
|
||||
long") was a great step for reducing possibility of silent hang up
|
||||
problem caused by memory allocation stalls. But this commit reverts it,
|
||||
for it is possible to trigger OOM lockup and/or soft lockups when many
|
||||
threads concurrently called warn_alloc() (in order to warn about memory
|
||||
allocation stalls) due to current implementation of printk(), and it is
|
||||
difficult to obtain useful information due to limitation of synchronous
|
||||
warning approach.
|
||||
|
||||
Current printk() implementation flushes all pending logs using the
|
||||
context of a thread which called console_unlock(). printk() should be
|
||||
able to flush all pending logs eventually unless somebody continues
|
||||
appending to printk() buffer.
|
||||
|
||||
Since warn_alloc() started appending to printk() buffer while waiting
|
||||
for oom_kill_process() to make forward progress when oom_kill_process()
|
||||
is processing pending logs, it became possible for warn_alloc() to force
|
||||
oom_kill_process() loop inside printk(). As a result, warn_alloc()
|
||||
significantly increased possibility of preventing oom_kill_process()
|
||||
from making forward progress.
|
||||
|
||||
---------- Pseudo code start ----------
|
||||
Before warn_alloc() was introduced:
|
||||
|
||||
retry:
|
||||
if (mutex_trylock(&oom_lock)) {
|
||||
while (atomic_read(&printk_pending_logs) > 0) {
|
||||
atomic_dec(&printk_pending_logs);
|
||||
print_one_log();
|
||||
}
|
||||
// Send SIGKILL here.
|
||||
mutex_unlock(&oom_lock)
|
||||
}
|
||||
goto retry;
|
||||
|
||||
After warn_alloc() was introduced:
|
||||
|
||||
retry:
|
||||
if (mutex_trylock(&oom_lock)) {
|
||||
while (atomic_read(&printk_pending_logs) > 0) {
|
||||
atomic_dec(&printk_pending_logs);
|
||||
print_one_log();
|
||||
}
|
||||
// Send SIGKILL here.
|
||||
mutex_unlock(&oom_lock)
|
||||
} else if (waited_for_10seconds()) {
|
||||
atomic_inc(&printk_pending_logs);
|
||||
}
|
||||
goto retry;
|
||||
---------- Pseudo code end ----------
|
||||
|
||||
Although waited_for_10seconds() becomes true once per 10 seconds,
|
||||
unbounded number of threads can call waited_for_10seconds() at the same
|
||||
time. Also, since threads doing waited_for_10seconds() keep doing
|
||||
almost busy loop, the thread doing print_one_log() can use little CPU
|
||||
resource. Therefore, this situation can be simplified like
|
||||
|
||||
---------- Pseudo code start ----------
|
||||
retry:
|
||||
if (mutex_trylock(&oom_lock)) {
|
||||
while (atomic_read(&printk_pending_logs) > 0) {
|
||||
atomic_dec(&printk_pending_logs);
|
||||
print_one_log();
|
||||
}
|
||||
// Send SIGKILL here.
|
||||
mutex_unlock(&oom_lock)
|
||||
} else {
|
||||
atomic_inc(&printk_pending_logs);
|
||||
}
|
||||
goto retry;
|
||||
---------- Pseudo code end ----------
|
||||
|
||||
when printk() is called faster than print_one_log() can process a log.
|
||||
|
||||
One of possible mitigation would be to introduce a new lock in order to
|
||||
make sure that no other series of printk() (either oom_kill_process() or
|
||||
warn_alloc()) can append to printk() buffer when one series of printk()
|
||||
(either oom_kill_process() or warn_alloc()) is already in progress.
|
||||
|
||||
Such serialization will also help obtaining kernel messages in readable
|
||||
form.
|
||||
|
||||
---------- Pseudo code start ----------
|
||||
retry:
|
||||
if (mutex_trylock(&oom_lock)) {
|
||||
mutex_lock(&oom_printk_lock);
|
||||
while (atomic_read(&printk_pending_logs) > 0) {
|
||||
atomic_dec(&printk_pending_logs);
|
||||
print_one_log();
|
||||
}
|
||||
// Send SIGKILL here.
|
||||
mutex_unlock(&oom_printk_lock);
|
||||
mutex_unlock(&oom_lock)
|
||||
} else {
|
||||
if (mutex_trylock(&oom_printk_lock)) {
|
||||
atomic_inc(&printk_pending_logs);
|
||||
mutex_unlock(&oom_printk_lock);
|
||||
}
|
||||
}
|
||||
goto retry;
|
||||
---------- Pseudo code end ----------
|
||||
|
||||
But this commit does not go that direction, for we don't want to
|
||||
introduce a new lock dependency, and we unlikely be able to obtain
|
||||
useful information even if we serialized oom_kill_process() and
|
||||
warn_alloc().
|
||||
|
||||
Synchronous approach is prone to unexpected results (e.g. too late [1],
|
||||
too frequent [2], overlooked [3]). As far as I know, warn_alloc() never
|
||||
helped with providing information other than "something is going wrong".
|
||||
I want to consider asynchronous approach which can obtain information
|
||||
during stalls with possibly relevant threads (e.g. the owner of
|
||||
oom_lock and kswapd-like threads) and serve as a trigger for actions
|
||||
(e.g. turn on/off tracepoints, ask libvirt daemon to take a memory dump
|
||||
of stalling KVM guest for diagnostic purpose).
|
||||
|
||||
This commit temporarily loses ability to report e.g. OOM lockup due to
|
||||
unable to invoke the OOM killer due to !__GFP_FS allocation request.
|
||||
But asynchronous approach will be able to detect such situation and emit
|
||||
warning. Thus, let's remove warn_alloc().
|
||||
|
||||
[1] https://bugzilla.kernel.org/show_bug.cgi?id=192981
|
||||
[2] http://lkml.kernel.org/r/CAM_iQpWuPVGc2ky8M-9yukECtS+zKjiDasNymX7rMcBjBFyM_A@mail.gmail.com
|
||||
[3] commit db73ee0d46379922 ("mm, vmscan: do not loop on too_many_isolated for ever"))
|
||||
|
||||
Link: http://lkml.kernel.org/r/1509017339-4802-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp
|
||||
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
|
||||
Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
|
||||
Reported-by: yuwang.yuwang <yuwang.yuwang@alibaba-inc.com>
|
||||
Reported-by: Johannes Weiner <hannes@cmpxchg.org>
|
||||
Acked-by: Michal Hocko <mhocko@suse.com>
|
||||
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
|
||||
Cc: Petr Mladek <pmladek@suse.com>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
---
|
||||
mm/page_alloc.c | 10 ----------
|
||||
1 file changed, 10 deletions(-)
|
||||
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index 04bf1ad50144..bd1a686e40fe 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -3903,8 +3903,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
||||
enum compact_result compact_result;
|
||||
int compaction_retries;
|
||||
int no_progress_loops;
|
||||
- unsigned long alloc_start = jiffies;
|
||||
- unsigned int stall_timeout = 10 * HZ;
|
||||
unsigned int cpuset_mems_cookie;
|
||||
int reserve_flags;
|
||||
|
||||
@@ -4036,14 +4034,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
||||
if (!can_direct_reclaim)
|
||||
goto nopage;
|
||||
|
||||
- /* Make sure we know about allocations which stall for too long */
|
||||
- if (time_after(jiffies, alloc_start + stall_timeout)) {
|
||||
- warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask,
|
||||
- "page allocation stalls for %ums, order:%u",
|
||||
- jiffies_to_msecs(jiffies-alloc_start), order);
|
||||
- stall_timeout += 10 * HZ;
|
||||
- }
|
||||
-
|
||||
/* Avoid recursion of direct reclaim */
|
||||
if (current->flags & PF_MEMALLOC)
|
||||
goto nopage;
|
||||
--
|
||||
2.14.3
|
||||
|
|
@ -683,6 +683,9 @@ Patch635: Add-support-for-One-by-Wacom-CTL-472-CTL-672.patch
|
|||
# CVE-2018-5750 rhbz 1539706 1539708
|
||||
Patch636: ACPI-sbshc-remove-raw-pointer-from-printk-message.patch
|
||||
|
||||
# rhbz 1492664 1492665
|
||||
Patch637: 0001-mm-don-t-warn-about-allocations-which-stall-for-too-.patch
|
||||
|
||||
# END OF PATCH DEFINITIONS
|
||||
|
||||
%endif
|
||||
|
@ -2242,6 +2245,7 @@ fi
|
|||
%changelog
|
||||
* Mon Jan 29 2018 Justin M. Forbes <jforbes@fedoraproject.org>
|
||||
- Fix CVE-2018-5750 (rhbz 1539706 1539708)
|
||||
- Fix softlockup (rhbz 1492664 1492665)
|
||||
|
||||
* Sat Jan 27 2018 Laura Abbott <labbott@fedoraproject.org>
|
||||
- Add support for Wacom tablet (rhbz 1539238)
|
||||
|
|
Loading…
Reference in New Issue