diff options
-rw-r--r-- | MAINTAINERS | 9 | ||||
-rw-r--r-- | drivers/base/power/main.c | 4 | ||||
-rw-r--r-- | drivers/base/power/runtime.c | 56 | ||||
-rw-r--r-- | drivers/powercap/Kconfig | 2 | ||||
-rw-r--r-- | include/linux/energy_model.h | 20 | ||||
-rw-r--r-- | include/linux/pm_runtime.h | 17 | ||||
-rw-r--r-- | kernel/power/Kconfig | 3 | ||||
-rw-r--r-- | kernel/power/energy_model.c | 67 |
8 files changed, 92 insertions, 86 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index f89a33a48d11..c16fce978ea1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8528,6 +8528,15 @@ M: Maxim Levitsky <maximlevitsky@gmail.com> S: Maintained F: drivers/media/rc/ene_ir.* +ENERGY MODEL +M: Lukasz Luba <lukasz.luba@arm.com> +M: "Rafael J. Wysocki" <rafael@kernel.org> +L: linux-pm@vger.kernel.org +S: Maintained +F: kernel/power/energy_model.c +F: include/linux/energy_model.h +F: Documentation/power/energy-model.rst + EPAPR HYPERVISOR BYTE CHANNEL DEVICE DRIVER M: Laurentiu Tudor <laurentiu.tudor@nxp.com> L: linuxppc-dev@lists.ozlabs.org diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 40e1d8d8a589..dffa2aa1ba7d 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1404,6 +1404,10 @@ static int device_suspend_late(struct device *dev, pm_message_t state, bool asyn TRACE_DEVICE(dev); TRACE_SUSPEND(0); + /* + * Disable runtime PM for the device without checking if there is a + * pending resume request for it. + */ __pm_runtime_disable(dev, false); dpm_wait_for_subordinate(dev, async); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 2ee45841486b..da74e1c69f7a 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -448,8 +448,19 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev) retval = __rpm_callback(cb, dev); } - dev->power.runtime_error = retval; - return retval != -EACCES ? retval : -EIO; + /* + * Since -EACCES means that runtime PM is disabled for the given device, + * it should not be returned by runtime PM callbacks. If it is returned + * nevertheless, assume it to be a transient error and convert it to + * -EAGAIN. + */ + if (retval == -EACCES) + retval = -EAGAIN; + + if (retval != -EAGAIN && retval != -EBUSY) + dev->power.runtime_error = retval; + + return retval; } /** @@ -725,21 +736,18 @@ static int rpm_suspend(struct device *dev, int rpmflags) dev->power.deferred_resume = false; wake_up_all(&dev->power.wait_queue); - if (retval == -EAGAIN || retval == -EBUSY) { - dev->power.runtime_error = 0; + /* + * On transient errors, if the callback routine failed an autosuspend, + * and if the last_busy time has been updated so that there is a new + * autosuspend expiration time, automatically reschedule another + * autosuspend. + */ + if (!dev->power.runtime_error && (rpmflags & RPM_AUTO) && + pm_runtime_autosuspend_expiration(dev) != 0) + goto repeat; + + pm_runtime_cancel_pending(dev); - /* - * If the callback routine failed an autosuspend, and - * if the last_busy time has been updated so that there - * is a new autosuspend expiration time, automatically - * reschedule another autosuspend. - */ - if ((rpmflags & RPM_AUTO) && - pm_runtime_autosuspend_expiration(dev) != 0) - goto repeat; - } else { - pm_runtime_cancel_pending(dev); - } goto out; } @@ -1460,20 +1468,6 @@ int pm_runtime_barrier(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_barrier); -/** - * __pm_runtime_disable - Disable runtime PM of a device. - * @dev: Device to handle. - * @check_resume: If set, check if there's a resume request for the device. - * - * Increment power.disable_depth for the device and if it was zero previously, - * cancel all pending runtime PM requests for the device and wait for all - * operations in progress to complete. The device can be either active or - * suspended after its runtime PM has been disabled. - * - * If @check_resume is set and there's a resume request pending when - * __pm_runtime_disable() is called and power.disable_depth is zero, the - * function will wake up the device before disabling its runtime PM. - */ void __pm_runtime_disable(struct device *dev, bool check_resume) { spin_lock_irq(&dev->power.lock); @@ -1959,7 +1953,7 @@ int pm_runtime_force_resume(struct device *dev) int (*callback)(struct device *); int ret = 0; - if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume) + if (!dev->power.needs_force_resume) goto out; /* diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index 69ef8d081c98..03c4c796d993 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -82,7 +82,7 @@ config DTPM config DTPM_CPU bool "Add CPU power capping based on the energy model" - depends on DTPM && ENERGY_MODEL + depends on DTPM && ENERGY_MODEL && SMP help This enables support for CPU power limitation based on energy model. diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 78318d49276d..ddd09debfc7d 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -167,13 +167,13 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_update_perf_domain(struct device *dev, - struct em_perf_table __rcu *new_table); + struct em_perf_table *new_table); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span, - bool microwatts); + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); -struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd); -void em_table_free(struct em_perf_table __rcu *table); +struct em_perf_table *em_table_alloc(struct em_perf_domain *pd); +void em_table_free(struct em_perf_table *table); int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, int nr_states); int em_dev_update_chip_binning(struct device *dev); @@ -346,8 +346,8 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span, - bool microwatts) + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts) { return -EINVAL; } @@ -373,14 +373,14 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) return 0; } static inline -struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) +struct em_perf_table *em_table_alloc(struct em_perf_domain *pd) { return NULL; } -static inline void em_table_free(struct em_perf_table __rcu *table) {} +static inline void em_table_free(struct em_perf_table *table) {} static inline int em_dev_update_perf_domain(struct device *dev, - struct em_perf_table __rcu *new_table) + struct em_perf_table *new_table) { return -EINVAL; } diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d39dc863f612..72c62e1171ca 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -556,11 +556,18 @@ static inline int pm_runtime_set_suspended(struct device *dev) * pm_runtime_disable - Disable runtime PM for a device. * @dev: Target device. * - * Prevent the runtime PM framework from working with @dev (by incrementing its - * "blocking" counter). - * - * For each invocation of this function for @dev there must be a matching - * pm_runtime_enable() call in order for runtime PM to be enabled for it. + * Prevent the runtime PM framework from working with @dev by incrementing its + * "disable" counter. + * + * If the counter is zero when this function runs and there is a pending runtime + * resume request for @dev, it will be resumed. If the counter is still zero at + * that point, all of the pending runtime PM requests for @dev will be canceled + * and all runtime PM operations in progress involving it will be waited for to + * complete. + * + * For each invocation of this function for @dev, there must be a matching + * pm_runtime_enable() call, so that runtime PM is eventually enabled for it + * again. */ static inline void pm_runtime_disable(struct device *dev) { diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index ca947ed32e3d..54a623680019 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -380,8 +380,7 @@ config CPU_PM config ENERGY_MODEL bool "Energy Model for devices with DVFS (CPUs, GPUs, etc)" - depends on SMP - depends on CPU_FREQ + depends on CPU_FREQ || PM_DEVFREQ help Several subsystems (thermal and/or the task scheduler for example) can leverage information about the energy consumed by devices to diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 3874f0e97651..d9b7e2b38c7a 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -161,22 +161,10 @@ static void em_debug_create_pd(struct device *dev) {} static void em_debug_remove_pd(struct device *dev) {} #endif -static void em_destroy_table_rcu(struct rcu_head *rp) -{ - struct em_perf_table __rcu *table; - - table = container_of(rp, struct em_perf_table, rcu); - kfree(table); -} - static void em_release_table_kref(struct kref *kref) { - struct em_perf_table __rcu *table; - /* It was the last owner of this table so we can free */ - table = container_of(kref, struct em_perf_table, kref); - - call_rcu(&table->rcu, em_destroy_table_rcu); + kfree_rcu(container_of(kref, struct em_perf_table, kref), rcu); } /** @@ -185,7 +173,7 @@ static void em_release_table_kref(struct kref *kref) * * No return values. */ -void em_table_free(struct em_perf_table __rcu *table) +void em_table_free(struct em_perf_table *table) { kref_put(&table->kref, em_release_table_kref); } @@ -198,9 +186,9 @@ void em_table_free(struct em_perf_table __rcu *table) * has a user. * Returns allocated table or NULL. */ -struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) +struct em_perf_table *em_table_alloc(struct em_perf_domain *pd) { - struct em_perf_table __rcu *table; + struct em_perf_table *table; int table_size; table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; @@ -239,7 +227,7 @@ static void em_init_performance(struct device *dev, struct em_perf_domain *pd, } static int em_compute_costs(struct device *dev, struct em_perf_state *table, - struct em_data_callback *cb, int nr_states, + const struct em_data_callback *cb, int nr_states, unsigned long flags) { unsigned long prev_cost = ULONG_MAX; @@ -308,9 +296,9 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, * Return 0 on success or an error code on failure. */ int em_dev_update_perf_domain(struct device *dev, - struct em_perf_table __rcu *new_table) + struct em_perf_table *new_table) { - struct em_perf_table __rcu *old_table; + struct em_perf_table *old_table; struct em_perf_domain *pd; if (!dev) @@ -327,7 +315,8 @@ int em_dev_update_perf_domain(struct device *dev, kref_get(&new_table->kref); - old_table = pd->em_table; + old_table = rcu_dereference_protected(pd->em_table, + lockdep_is_held(&em_pd_mutex)); rcu_assign_pointer(pd->em_table, new_table); em_cpufreq_update_efficiencies(dev, new_table->state); @@ -341,7 +330,7 @@ EXPORT_SYMBOL_GPL(em_dev_update_perf_domain); static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, struct em_perf_state *table, - struct em_data_callback *cb, + const struct em_data_callback *cb, unsigned long flags) { unsigned long power, freq, prev_freq = 0; @@ -396,10 +385,11 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, } static int em_create_pd(struct device *dev, int nr_states, - struct em_data_callback *cb, cpumask_t *cpus, + const struct em_data_callback *cb, + const cpumask_t *cpus, unsigned long flags) { - struct em_perf_table __rcu *em_table; + struct em_perf_table *em_table; struct em_perf_domain *pd; struct device *cpu_dev; int cpu, ret, num_cpus; @@ -556,9 +546,10 @@ EXPORT_SYMBOL_GPL(em_cpu_get); * Return 0 on success */ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *cpus, - bool microwatts) + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts) { + struct em_perf_table *em_table; unsigned long cap, prev_cap = 0; unsigned long flags = 0; int cpu, ret; @@ -631,7 +622,9 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, dev->em_pd->min_perf_state = 0; dev->em_pd->max_perf_state = nr_states - 1; - em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state); + em_table = rcu_dereference_protected(dev->em_pd->em_table, + lockdep_is_held(&em_pd_mutex)); + em_cpufreq_update_efficiencies(dev, em_table->state); em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); @@ -668,7 +661,8 @@ void em_dev_unregister_perf_domain(struct device *dev) mutex_lock(&em_pd_mutex); em_debug_remove_pd(dev); - em_table_free(dev->em_pd->em_table); + em_table_free(rcu_dereference_protected(dev->em_pd->em_table, + lockdep_is_held(&em_pd_mutex))); kfree(dev->em_pd); dev->em_pd = NULL; @@ -676,9 +670,9 @@ void em_dev_unregister_perf_domain(struct device *dev) } EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain); -static struct em_perf_table __rcu *em_table_dup(struct em_perf_domain *pd) +static struct em_perf_table *em_table_dup(struct em_perf_domain *pd) { - struct em_perf_table __rcu *em_table; + struct em_perf_table *em_table; struct em_perf_state *ps, *new_ps; int ps_size; @@ -700,7 +694,7 @@ static struct em_perf_table __rcu *em_table_dup(struct em_perf_domain *pd) } static int em_recalc_and_update(struct device *dev, struct em_perf_domain *pd, - struct em_perf_table __rcu *em_table) + struct em_perf_table *em_table) { int ret; @@ -728,10 +722,9 @@ free_em_table: * are correctly calculated. */ static void em_adjust_new_capacity(struct device *dev, - struct em_perf_domain *pd, - u64 max_cap) + struct em_perf_domain *pd) { - struct em_perf_table __rcu *em_table; + struct em_perf_table *em_table; em_table = em_table_dup(pd); if (!em_table) { @@ -775,7 +768,8 @@ static void em_check_capacity_update(void) } cpufreq_cpu_put(policy); - pd = em_cpu_get(cpu); + dev = get_cpu_device(cpu); + pd = em_pd_get(dev); if (!pd || em_is_artificial(pd)) continue; @@ -799,8 +793,7 @@ static void em_check_capacity_update(void) pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n", cpu, cpu_capacity, em_max_perf); - dev = get_cpu_device(cpu); - em_adjust_new_capacity(dev, pd, cpu_capacity); + em_adjust_new_capacity(dev, pd); } free_cpumask_var(cpu_done_mask); @@ -822,7 +815,7 @@ static void em_update_workfn(struct work_struct *work) */ int em_dev_update_chip_binning(struct device *dev) { - struct em_perf_table __rcu *em_table; + struct em_perf_table *em_table; struct em_perf_domain *pd; int i, ret; |