140 lines
5.0 KiB
Diff
140 lines
5.0 KiB
Diff
From 0c1508129adc051fabaf8debefea79baa2f1a81b Mon Sep 17 00:00:00 2001
|
|
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
|
|
Date: Thu, 24 May 2012 19:46:26 +0530
|
|
Subject: [PATCH] CPU hotplug, cpusets, suspend: Don't modify cpusets during
|
|
suspend/resume
|
|
|
|
In the event of CPU hotplug, the kernel modifies the cpusets' cpus_allowed
|
|
masks as and when necessary to ensure that the tasks belonging to the cpusets
|
|
have some place (online CPUs) to run on. And regular CPU hotplug is
|
|
destructive in the sense that the kernel doesn't remember the original cpuset
|
|
configurations set by the user, across hotplug operations.
|
|
|
|
However, suspend/resume (which uses CPU hotplug) is a special case in which
|
|
the kernel has the responsibility to restore the system (during resume), to
|
|
exactly the same state it was in before suspend.
|
|
|
|
In order to achieve that, do the following:
|
|
|
|
1. Don't modify cpusets during suspend/resume. At all.
|
|
In particular, don't move the tasks from one cpuset to another, and
|
|
don't modify any cpuset's cpus_allowed mask. So, simply ignore cpusets
|
|
during the CPU hotplug operations that are carried out in the
|
|
suspend/resume path.
|
|
|
|
2. However, cpusets and sched domains are related. We just want to avoid
|
|
altering cpusets alone. So, to keep the sched domains updated, build
|
|
a single sched domain (containing all active cpus) during each of the
|
|
CPU hotplug operations carried out in s/r path, effectively ignoring
|
|
the cpusets' cpus_allowed masks.
|
|
|
|
(Since userspace is frozen while doing all this, it will go unnoticed.)
|
|
|
|
3. During the last CPU online operation during resume, build the sched
|
|
domains by looking up the (unaltered) cpusets' cpus_allowed masks.
|
|
That will bring back the system to the same original state as it was in
|
|
before suspend.
|
|
|
|
Ultimately, this will not only solve the cpuset problem related to suspend
|
|
resume (ie., restores the cpusets to exactly what it was before suspend, by
|
|
not touching it at all) but also speeds up suspend/resume because we avoid
|
|
running cpuset update code for every CPU being offlined/onlined.
|
|
|
|
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
|
|
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
|
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
|
Cc: Andrew Morton <akpm@linux-foundation.org>
|
|
Cc: Thomas Gleixner <tglx@linutronix.de>
|
|
Link: http://lkml.kernel.org/r/20120524141611.3692.20155.stgit@srivatsabhat.in.ibm.com
|
|
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
|
---
|
|
kernel/cpuset.c | 3 +++
|
|
kernel/sched/core.c | 40 ++++++++++++++++++++++++++++++++++++----
|
|
2 files changed, 39 insertions(+), 4 deletions(-)
|
|
|
|
--- linux-3.4.6-3.1.fc17.noarch.orig/kernel/cpuset.c
|
|
+++ linux-3.4.6-3.1.fc17.noarch/kernel/cpuset.c
|
|
@@ -2065,6 +2065,9 @@ static void scan_for_empty_cpusets(struc
|
|
* (of no affect) on systems that are actively using CPU hotplug
|
|
* but making no active use of cpusets.
|
|
*
|
|
+ * The only exception to this is suspend/resume, where we don't
|
|
+ * modify cpusets at all.
|
|
+ *
|
|
* This routine ensures that top_cpuset.cpus_allowed tracks
|
|
* cpu_active_mask on each CPU hotplug (cpuhp) event.
|
|
*
|
|
--- linux-3.4.6-3.1.fc17.noarch.orig/kernel/sched/core.c
|
|
+++ linux-3.4.6-3.1.fc17.noarch/kernel/sched/core.c
|
|
@@ -6931,34 +6931,66 @@ int __init sched_create_sysfs_power_savi
|
|
}
|
|
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
|
|
|
|
+static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */
|
|
+
|
|
/*
|
|
* Update cpusets according to cpu_active mask. If cpusets are
|
|
* disabled, cpuset_update_active_cpus() becomes a simple wrapper
|
|
* around partition_sched_domains().
|
|
+ *
|
|
+ * If we come here as part of a suspend/resume, don't touch cpusets because we
|
|
+ * want to restore it back to its original state upon resume anyway.
|
|
*/
|
|
static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
|
|
void *hcpu)
|
|
{
|
|
- switch (action & ~CPU_TASKS_FROZEN) {
|
|
+ switch (action) {
|
|
+ case CPU_ONLINE_FROZEN:
|
|
+ case CPU_DOWN_FAILED_FROZEN:
|
|
+
|
|
+ /*
|
|
+ * num_cpus_frozen tracks how many CPUs are involved in suspend
|
|
+ * resume sequence. As long as this is not the last online
|
|
+ * operation in the resume sequence, just build a single sched
|
|
+ * domain, ignoring cpusets.
|
|
+ */
|
|
+ num_cpus_frozen--;
|
|
+ if (likely(num_cpus_frozen)) {
|
|
+ partition_sched_domains(1, NULL, NULL);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * This is the last CPU online operation. So fall through and
|
|
+ * restore the original sched domains by considering the
|
|
+ * cpuset configurations.
|
|
+ */
|
|
+
|
|
case CPU_ONLINE:
|
|
case CPU_DOWN_FAILED:
|
|
cpuset_update_active_cpus();
|
|
- return NOTIFY_OK;
|
|
+ break;
|
|
default:
|
|
return NOTIFY_DONE;
|
|
}
|
|
+ return NOTIFY_OK;
|
|
}
|
|
|
|
static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
|
|
void *hcpu)
|
|
{
|
|
- switch (action & ~CPU_TASKS_FROZEN) {
|
|
+ switch (action) {
|
|
case CPU_DOWN_PREPARE:
|
|
cpuset_update_active_cpus();
|
|
- return NOTIFY_OK;
|
|
+ break;
|
|
+ case CPU_DOWN_PREPARE_FROZEN:
|
|
+ num_cpus_frozen++;
|
|
+ partition_sched_domains(1, NULL, NULL);
|
|
+ break;
|
|
default:
|
|
return NOTIFY_DONE;
|
|
}
|
|
+ return NOTIFY_OK;
|
|
}
|
|
|
|
void __init sched_init_smp(void)
|