From a5a0809bc58e133d674e45175b052c9bdf002f1d Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Sun, 23 Jul 2017 08:54:25 -0700 Subject: [PATCH 1/8] cpufreq: schedutil: Make iowait boost more energy efficient Currently the iowait_boost feature in schedutil makes the frequency go to max on iowait wakeups. This feature was added to handle a case that Peter described where the throughput of operations involving continuous I/O requests [1] is reduced due to running at a lower frequency, however the lower throughput itself causes utilization to be low and hence causing frequency to be low hence its "stuck". Instead of going to max, its also possible to achieve the same effect by ramping up to max if there are repeated in_iowait wakeups happening. This patch is an attempt to do that. We start from a lower frequency (policy->min) and double the boost for every consecutive iowait update until we reach the maximum iowait boost frequency (iowait_boost_max). I ran a synthetic test (continuous O_DIRECT writes in a loop) on an x86 machine with intel_pstate in passive mode using schedutil. In this test the iowait_boost value ramped from 800MHz to 4GHz in 60ms. The patch achieves the desired improved throughput as the existing behavior. [1] https://patchwork.kernel.org/patch/9735885/ Suggested-by: Peter Zijlstra Suggested-by: Viresh Kumar Signed-off-by: Joel Fernandes Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 38 +++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 29a397067ffa..148844a995a8 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -53,6 +53,7 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; + bool iowait_boost_pending; unsigned long iowait_boost; unsigned long iowait_boost_max; u64 last_update; @@ -169,30 +170,54 @@ static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags) { if (flags & SCHED_CPUFREQ_IOWAIT) { - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + if (sg_cpu->iowait_boost_pending) + return; + + sg_cpu->iowait_boost_pending = true; + + if (sg_cpu->iowait_boost) { + sg_cpu->iowait_boost <<= 1; + if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) + sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + } else { + sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; + } } else if (sg_cpu->iowait_boost) { s64 delta_ns = time - sg_cpu->last_update; /* Clear iowait_boost if the CPU apprears to have been idle. */ - if (delta_ns > TICK_NSEC) + if (delta_ns > TICK_NSEC) { sg_cpu->iowait_boost = 0; + sg_cpu->iowait_boost_pending = false; + } } } static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, unsigned long *max) { - unsigned long boost_util = sg_cpu->iowait_boost; - unsigned long boost_max = sg_cpu->iowait_boost_max; + unsigned long boost_util, boost_max; - if (!boost_util) + if (!sg_cpu->iowait_boost) return; + if (sg_cpu->iowait_boost_pending) { + sg_cpu->iowait_boost_pending = false; + } else { + sg_cpu->iowait_boost >>= 1; + if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { + sg_cpu->iowait_boost = 0; + return; + } + } + + boost_util = sg_cpu->iowait_boost; + boost_max = sg_cpu->iowait_boost_max; + if (*util * boost_max < *max * boost_util) { *util = boost_util; *max = boost_max; } - sg_cpu->iowait_boost >>= 1; } #ifdef CONFIG_NO_HZ_COMMON @@ -264,6 +289,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) delta_ns = time - j_sg_cpu->last_update; if (delta_ns > TICK_NSEC) { j_sg_cpu->iowait_boost = 0; + j_sg_cpu->iowait_boost_pending = false; continue; } if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) From 251accf98591d7f59f7a2bac2e05c66d16bf2811 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Sun, 23 Jul 2017 08:54:26 -0700 Subject: [PATCH 2/8] cpufreq: schedutil: Use unsigned int for iowait boost Make iowait_boost and iowait_boost_max as unsigned int since its unit is kHz and this is consistent with struct cpufreq_policy. Also change the local variables in sugov_iowait_boost() to match this. Signed-off-by: Joel Fernandes Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 148844a995a8..ddd385f2a985 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -54,8 +54,8 @@ struct sugov_cpu { struct sugov_policy *sg_policy; bool iowait_boost_pending; - unsigned long iowait_boost; - unsigned long iowait_boost_max; + unsigned int iowait_boost; + unsigned int iowait_boost_max; u64 last_update; /* The fields below are only needed when sharing a policy. */ @@ -196,7 +196,7 @@ static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, unsigned long *max) { - unsigned long boost_util, boost_max; + unsigned int boost_util, boost_max; if (!sg_cpu->iowait_boost) return; From 674e75411fc260b0d4532701228cfe12fc090da8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 28 Jul 2017 12:16:38 +0530 Subject: [PATCH 3/8] sched: cpufreq: Allow remote cpufreq callbacks With Android UI and benchmarks the latency of cpufreq response to certain scheduling events can become very critical. Currently, callbacks into cpufreq governors are only made from the scheduler if the target CPU of the event is the same as the current CPU. This means there are certain situations where a target CPU may not run the cpufreq governor for some time. One testcase to show this behavior is where a task starts running on CPU0, then a new task is also spawned on CPU0 by a task on CPU1. If the system is configured such that the new tasks should receive maximum demand initially, this should result in CPU0 increasing frequency immediately. But because of the above mentioned limitation though, this does not occur. This patch updates the scheduler core to call the cpufreq callbacks for remote CPUs as well. The schedutil, ondemand and conservative governors are updated to process cpufreq utilization update hooks called for remote CPUs where the remote CPU is managed by the cpufreq policy of the local CPU. The intel_pstate driver is updated to always reject remote callbacks. This is tested with couple of usecases (Android: hackbench, recentfling, galleryfling, vellamo, Ubuntu: hackbench) on ARM hikey board (64 bit octa-core, single policy). Only galleryfling showed minor improvements, while others didn't had much deviation. The reason being that this patch only targets a corner case, where following are required to be true to improve performance and that doesn't happen too often with these tests: - Task is migrated to another CPU. - The task has high demand, and should take the target CPU to higher OPPs. - And the target CPU doesn't call into the cpufreq governor until the next tick. Based on initial work from Steve Muckle. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Acked-by: Peter Zijlstra (Intel) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 3 +++ drivers/cpufreq/intel_pstate.c | 8 ++++++++ include/linux/cpufreq.h | 9 +++++++++ kernel/sched/cpufreq_schedutil.c | 31 +++++++++++++++++++++++++----- kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 8 +++++--- kernel/sched/rt.c | 2 +- kernel/sched/sched.h | 10 ++-------- 8 files changed, 55 insertions(+), 18 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 47e24b5384b3..ce5f3ec7ce71 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -275,6 +275,9 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; u64 delta_ns, lst; + if (!cpufreq_can_do_remote_dvfs(policy_dbs->policy)) + return; + /* * The work may not be allowed to be queued up right now. * Possible reasons: diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6cd503525638..d299b86a5a00 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1747,6 +1747,10 @@ static void intel_pstate_update_util_pid(struct update_util_data *data, struct cpudata *cpu = container_of(data, struct cpudata, update_util); u64 delta_ns = time - cpu->sample.time; + /* Don't allow remote callbacks */ + if (smp_processor_id() != cpu->cpu) + return; + if ((s64)delta_ns < pid_params.sample_rate_ns) return; @@ -1764,6 +1768,10 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, struct cpudata *cpu = container_of(data, struct cpudata, update_util); u64 delta_ns; + /* Don't allow remote callbacks */ + if (smp_processor_id() != cpu->cpu) + return; + if (flags & SCHED_CPUFREQ_IOWAIT) { cpu->iowait_boost = int_tofp(1); } else if (cpu->iowait_boost) { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index f10a9b3761cd..c4035964e6b3 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -562,6 +562,15 @@ struct governor_attr { size_t count); }; +static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) +{ + /* Allow remote callbacks only on the CPUs sharing cpufreq policy */ + if (cpumask_test_cpu(smp_processor_id(), policy->cpus)) + return true; + + return false; +} + /********************************************************************* * FREQUENCY TABLE HELPERS * *********************************************************************/ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index ddd385f2a985..7dbc76801f86 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -52,6 +52,7 @@ struct sugov_policy { struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; + unsigned int cpu; bool iowait_boost_pending; unsigned int iowait_boost; @@ -77,6 +78,21 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) { s64 delta_ns; + /* + * Since cpufreq_update_util() is called with rq->lock held for + * the @target_cpu, our per-cpu data is fully serialized. + * + * However, drivers cannot in general deal with cross-cpu + * requests, so while get_next_freq() will work, our + * sugov_update_commit() call may not. + * + * Hence stop here for remote requests if they aren't supported + * by the hardware, as calculating the frequency is pointless if + * we cannot in fact act on it. + */ + if (!cpufreq_can_do_remote_dvfs(sg_policy->policy)) + return false; + if (sg_policy->work_in_progress) return false; @@ -155,12 +171,12 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, return cpufreq_driver_resolve_freq(policy, freq); } -static void sugov_get_util(unsigned long *util, unsigned long *max) +static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu) { - struct rq *rq = this_rq(); + struct rq *rq = cpu_rq(cpu); unsigned long cfs_max; - cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id()); + cfs_max = arch_scale_cpu_capacity(NULL, cpu); *util = min(rq->cfs.avg.util_avg, cfs_max); *max = cfs_max; @@ -254,7 +270,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (flags & SCHED_CPUFREQ_RT_DL) { next_f = policy->cpuinfo.max_freq; } else { - sugov_get_util(&util, &max); + sugov_get_util(&util, &max, sg_cpu->cpu); sugov_iowait_boost(sg_cpu, &util, &max); next_f = get_next_freq(sg_policy, util, max); /* @@ -316,7 +332,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, unsigned long util, max; unsigned int next_f; - sugov_get_util(&util, &max); + sugov_get_util(&util, &max, sg_cpu->cpu); raw_spin_lock(&sg_policy->update_lock); @@ -697,6 +713,11 @@ struct cpufreq_governor *cpufreq_default_governor(void) static int __init sugov_register(void) { + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(sugov_cpu, cpu).cpu = cpu; + return cpufreq_register_governor(&schedutil_gov); } fs_initcall(sugov_register); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 755bd3f1a1a9..5c3bf4bd0327 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1136,7 +1136,7 @@ static void update_curr_dl(struct rq *rq) } /* kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL); + cpufreq_update_util(rq, SCHED_CPUFREQ_DL); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c95880e216f6..d378d02fdfcb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3278,7 +3278,9 @@ static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {} static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) { - if (&this_rq()->cfs == cfs_rq) { + struct rq *rq = rq_of(cfs_rq); + + if (&rq->cfs == cfs_rq) { /* * There are a few boundary cases this might miss but it should * get called often enough that that should (hopefully) not be @@ -3295,7 +3297,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) * * See cpu_util(). */ - cpufreq_update_util(rq_of(cfs_rq), 0); + cpufreq_update_util(rq, 0); } } @@ -4875,7 +4877,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) * passed. */ if (p->in_iowait) - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT); + cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); for_each_sched_entity(se) { if (se->on_rq) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 45caf937ef90..0af5ca9e3e3f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -970,7 +970,7 @@ static void update_curr_rt(struct rq *rq) return; /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); + cpufreq_update_util(rq, SCHED_CPUFREQ_RT); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index eeef1a3086d1..aa9d5b87b4f8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2070,19 +2070,13 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) { struct update_util_data *data; - data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); + data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, + cpu_of(rq))); if (data) data->func(data, rq_clock(rq), flags); } - -static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) -{ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_update_util(rq, flags); -} #else static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} -static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ #ifdef arch_scale_freq_capacity From 99d14d0e16fadb4de001bacc0ac0786a9ebecf2a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 28 Jul 2017 12:16:39 +0530 Subject: [PATCH 4/8] cpufreq: Process remote callbacks from any CPU if the platform permits On many platforms, CPUs can do DVFS across cpufreq policies. i.e CPU from policy-A can change frequency of CPUs belonging to policy-B. This is quite common in case of ARM platforms where we don't configure any per-cpu register. Add a flag to identify such platforms and update cpufreq_can_do_remote_dvfs() to allow remote callbacks if this flag is set. Also enable the flag for cpufreq-dt driver which is used only on ARM platforms currently. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Acked-by: Peter Zijlstra (Intel) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 1 + include/linux/cpufreq.h | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index fef3c2160691..d83ab94d041a 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -274,6 +274,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) transition_latency = CPUFREQ_ETERNAL; policy->cpuinfo.transition_latency = transition_latency; + policy->dvfs_possible_from_any_cpu = true; return 0; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c4035964e6b3..1981a4a167ce 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -127,6 +127,15 @@ struct cpufreq_policy { */ unsigned int transition_delay_us; + /* + * Remote DVFS flag (Not added to the driver structure as we don't want + * to access another structure from scheduler hotpath). + * + * Should be set if CPUs can do DVFS on behalf of other CPUs from + * different cpufreq policies. + */ + bool dvfs_possible_from_any_cpu; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; int cached_resolved_idx; @@ -564,8 +573,13 @@ struct governor_attr { static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) { - /* Allow remote callbacks only on the CPUs sharing cpufreq policy */ - if (cpumask_test_cpu(smp_processor_id(), policy->cpus)) + /* + * Allow remote callbacks if: + * - dvfs_possible_from_any_cpu flag is set + * - the local and remote CPUs share cpufreq policy + */ + if (policy->dvfs_possible_from_any_cpu || + cpumask_test_cpu(smp_processor_id(), policy->cpus)) return true; return false; From d6344d4b5628052ccda104466e5e05b9c43d7b61 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 4 Aug 2017 14:57:39 +0200 Subject: [PATCH 5/8] cpufreq: Simplify cpufreq_can_do_remote_dvfs() The if () in cpufreq_can_do_remote_dvfs() is superfluous, so drop it and simply return the value of the expression under it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1981a4a167ce..d69464139f67 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -578,11 +578,8 @@ static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) * - dvfs_possible_from_any_cpu flag is set * - the local and remote CPUs share cpufreq policy */ - if (policy->dvfs_possible_from_any_cpu || - cpumask_test_cpu(smp_processor_id(), policy->cpus)) - return true; - - return false; + return policy->dvfs_possible_from_any_cpu || + cpumask_test_cpu(smp_processor_id(), policy->cpus); } /********************************************************************* From 209887e6b974c22328487b55d0f390522b014b03 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 9 Aug 2017 10:21:46 +0530 Subject: [PATCH 6/8] cpufreq: Return 0 from ->fast_switch() on errors CPUFREQ_ENTRY_INVALID is a special symbol which is used to specify that an entry in the cpufreq table is invalid. But using it outside of the scope of the cpufreq table looks a bit incorrect. We can represent an invalid frequency by writing it as 0 instead if we need. Note that it is already done that way for the return value of the ->get() callback. Lets do the same for ->fast_switch() and not use CPUFREQ_ENTRY_INVALID outside of the scope of cpufreq table. Also update the comment over cpufreq_driver_fast_switch() to clearly mention what this returns. None of the drivers return CPUFREQ_ENTRY_INVALID as of now from ->fast_switch() callback and so we don't need to update any of those. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 7 ++++--- kernel/sched/cpufreq_schedutil.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9bf97a366029..0c728190e444 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1817,9 +1817,10 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier); * twice in parallel for the same policy and that it will never be called in * parallel with either ->target() or ->target_index() for the same policy. * - * If CPUFREQ_ENTRY_INVALID is returned by the driver's ->fast_switch() - * callback to indicate an error condition, the hardware configuration must be - * preserved. + * Returns the actual frequency set for the CPU. + * + * If 0 is returned by the driver's ->fast_switch() callback to indicate an + * error condition, the hardware configuration must be preserved. */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 7dbc76801f86..2ba04bb3182a 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -123,7 +123,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, if (policy->fast_switch_enabled) { next_freq = cpufreq_driver_fast_switch(policy, next_freq); - if (next_freq == CPUFREQ_ENTRY_INVALID) + if (!next_freq) return; policy->cur = next_freq; From e2cabe48c20efb174ce0c01190f8b9c5f3ea1d13 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 10 Aug 2017 09:50:55 +0530 Subject: [PATCH 7/8] cpufreq: schedutil: Don't restrict kthread to related_cpus unnecessarily Utilization update callbacks are now processed remotely, even on the CPUs that don't share cpufreq policy with the target CPU (if dvfs_possible_from_any_cpu flag is set). But in non-fast switch paths, the frequency is changed only from one of policy->related_cpus. This happens because the kthread which does the actual update is bound to a subset of CPUs (i.e. related_cpus). Allow frequency to be remotely updated as well (i.e. call __cpufreq_driver_target()) if dvfs_possible_from_any_cpu flag is set. Reported-by: Pavan Kondeti Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 2ba04bb3182a..69571ee6a175 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -487,7 +487,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) } sg_policy->thread = thread; - kthread_bind_mask(thread, policy->related_cpus); + + /* Kthread is bound to all CPUs by default */ + if (!policy->dvfs_possible_from_any_cpu) + kthread_bind_mask(thread, policy->related_cpus); + init_irq_work(&sg_policy->irq_work, sugov_irq_work); mutex_init(&sg_policy->work_lock); From c49cbc19b31e069cb344921c7286d7549767d10e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 14 Aug 2017 14:50:16 +0530 Subject: [PATCH 8/8] cpufreq: schedutil: Always process remote callback with slow switching The frequency update from the utilization update handlers can be divided into two parts: (A) Finding the next frequency (B) Updating the frequency While any CPU can do (A), (B) can be restricted to a group of CPUs only, depending on the current platform. For platforms where fast cpufreq switching is possible, both (A) and (B) are always done from the same CPU and that CPU should be capable of changing the frequency of the target CPU. But for platforms where fast cpufreq switching isn't possible, after doing (A) we wake up a kthread which will eventually do (B). This kthread is already bound to the right set of CPUs, i.e. only those which can change the frequency of CPUs of a cpufreq policy. And so any CPU can actually do (A) in this case, as the frequency is updated from the right set of CPUs only. Check cpufreq_can_do_remote_dvfs() only for the fast switching case. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 69571ee6a175..a07f17a5f38f 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -84,13 +84,18 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) * * However, drivers cannot in general deal with cross-cpu * requests, so while get_next_freq() will work, our - * sugov_update_commit() call may not. + * sugov_update_commit() call may not for the fast switching platforms. * * Hence stop here for remote requests if they aren't supported * by the hardware, as calculating the frequency is pointless if * we cannot in fact act on it. + * + * For the slow switching platforms, the kthread is always scheduled on + * the right set of CPUs and any CPU can find the next frequency and + * schedule the kthread. */ - if (!cpufreq_can_do_remote_dvfs(sg_policy->policy)) + if (sg_policy->policy->fast_switch_enabled && + !cpufreq_can_do_remote_dvfs(sg_policy->policy)) return false; if (sg_policy->work_in_progress)