summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS9
-rw-r--r--drivers/base/power/main.c4
-rw-r--r--drivers/base/power/runtime.c56
-rw-r--r--drivers/powercap/Kconfig2
-rw-r--r--include/linux/energy_model.h20
-rw-r--r--include/linux/pm_runtime.h17
-rw-r--r--kernel/power/Kconfig3
-rw-r--r--kernel/power/energy_model.c67
8 files changed, 92 insertions, 86 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f89a33a48d11..c16fce978ea1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8528,6 +8528,15 @@ M: Maxim Levitsky <maximlevitsky@gmail.com>
S: Maintained
F: drivers/media/rc/ene_ir.*
+ENERGY MODEL
+M: Lukasz Luba <lukasz.luba@arm.com>
+M: "Rafael J. Wysocki" <rafael@kernel.org>
+L: linux-pm@vger.kernel.org
+S: Maintained
+F: kernel/power/energy_model.c
+F: include/linux/energy_model.h
+F: Documentation/power/energy-model.rst
+
EPAPR HYPERVISOR BYTE CHANNEL DEVICE DRIVER
M: Laurentiu Tudor <laurentiu.tudor@nxp.com>
L: linuxppc-dev@lists.ozlabs.org
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 40e1d8d8a589..dffa2aa1ba7d 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1404,6 +1404,10 @@ static int device_suspend_late(struct device *dev, pm_message_t state, bool asyn
TRACE_DEVICE(dev);
TRACE_SUSPEND(0);
+ /*
+ * Disable runtime PM for the device without checking if there is a
+ * pending resume request for it.
+ */
__pm_runtime_disable(dev, false);
dpm_wait_for_subordinate(dev, async);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 2ee45841486b..da74e1c69f7a 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -448,8 +448,19 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev)
retval = __rpm_callback(cb, dev);
}
- dev->power.runtime_error = retval;
- return retval != -EACCES ? retval : -EIO;
+ /*
+ * Since -EACCES means that runtime PM is disabled for the given device,
+ * it should not be returned by runtime PM callbacks. If it is returned
+ * nevertheless, assume it to be a transient error and convert it to
+ * -EAGAIN.
+ */
+ if (retval == -EACCES)
+ retval = -EAGAIN;
+
+ if (retval != -EAGAIN && retval != -EBUSY)
+ dev->power.runtime_error = retval;
+
+ return retval;
}
/**
@@ -725,21 +736,18 @@ static int rpm_suspend(struct device *dev, int rpmflags)
dev->power.deferred_resume = false;
wake_up_all(&dev->power.wait_queue);
- if (retval == -EAGAIN || retval == -EBUSY) {
- dev->power.runtime_error = 0;
+ /*
+ * On transient errors, if the callback routine failed an autosuspend,
+ * and if the last_busy time has been updated so that there is a new
+ * autosuspend expiration time, automatically reschedule another
+ * autosuspend.
+ */
+ if (!dev->power.runtime_error && (rpmflags & RPM_AUTO) &&
+ pm_runtime_autosuspend_expiration(dev) != 0)
+ goto repeat;
+
+ pm_runtime_cancel_pending(dev);
- /*
- * If the callback routine failed an autosuspend, and
- * if the last_busy time has been updated so that there
- * is a new autosuspend expiration time, automatically
- * reschedule another autosuspend.
- */
- if ((rpmflags & RPM_AUTO) &&
- pm_runtime_autosuspend_expiration(dev) != 0)
- goto repeat;
- } else {
- pm_runtime_cancel_pending(dev);
- }
goto out;
}
@@ -1460,20 +1468,6 @@ int pm_runtime_barrier(struct device *dev)
}
EXPORT_SYMBOL_GPL(pm_runtime_barrier);
-/**
- * __pm_runtime_disable - Disable runtime PM of a device.
- * @dev: Device to handle.
- * @check_resume: If set, check if there's a resume request for the device.
- *
- * Increment power.disable_depth for the device and if it was zero previously,
- * cancel all pending runtime PM requests for the device and wait for all
- * operations in progress to complete. The device can be either active or
- * suspended after its runtime PM has been disabled.
- *
- * If @check_resume is set and there's a resume request pending when
- * __pm_runtime_disable() is called and power.disable_depth is zero, the
- * function will wake up the device before disabling its runtime PM.
- */
void __pm_runtime_disable(struct device *dev, bool check_resume)
{
spin_lock_irq(&dev->power.lock);
@@ -1959,7 +1953,7 @@ int pm_runtime_force_resume(struct device *dev)
int (*callback)(struct device *);
int ret = 0;
- if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume)
+ if (!dev->power.needs_force_resume)
goto out;
/*
diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 69ef8d081c98..03c4c796d993 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -82,7 +82,7 @@ config DTPM
config DTPM_CPU
bool "Add CPU power capping based on the energy model"
- depends on DTPM && ENERGY_MODEL
+ depends on DTPM && ENERGY_MODEL && SMP
help
This enables support for CPU power limitation based on
energy model.
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 78318d49276d..ddd09debfc7d 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -167,13 +167,13 @@ struct em_data_callback {
struct em_perf_domain *em_cpu_get(int cpu);
struct em_perf_domain *em_pd_get(struct device *dev);
int em_dev_update_perf_domain(struct device *dev,
- struct em_perf_table __rcu *new_table);
+ struct em_perf_table *new_table);
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
- struct em_data_callback *cb, cpumask_t *span,
- bool microwatts);
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus, bool microwatts);
void em_dev_unregister_perf_domain(struct device *dev);
-struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd);
-void em_table_free(struct em_perf_table __rcu *table);
+struct em_perf_table *em_table_alloc(struct em_perf_domain *pd);
+void em_table_free(struct em_perf_table *table);
int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
int nr_states);
int em_dev_update_chip_binning(struct device *dev);
@@ -346,8 +346,8 @@ struct em_data_callback {};
static inline
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
- struct em_data_callback *cb, cpumask_t *span,
- bool microwatts)
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus, bool microwatts)
{
return -EINVAL;
}
@@ -373,14 +373,14 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
return 0;
}
static inline
-struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd)
+struct em_perf_table *em_table_alloc(struct em_perf_domain *pd)
{
return NULL;
}
-static inline void em_table_free(struct em_perf_table __rcu *table) {}
+static inline void em_table_free(struct em_perf_table *table) {}
static inline
int em_dev_update_perf_domain(struct device *dev,
- struct em_perf_table __rcu *new_table)
+ struct em_perf_table *new_table)
{
return -EINVAL;
}
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index d39dc863f612..72c62e1171ca 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -556,11 +556,18 @@ static inline int pm_runtime_set_suspended(struct device *dev)
* pm_runtime_disable - Disable runtime PM for a device.
* @dev: Target device.
*
- * Prevent the runtime PM framework from working with @dev (by incrementing its
- * "blocking" counter).
- *
- * For each invocation of this function for @dev there must be a matching
- * pm_runtime_enable() call in order for runtime PM to be enabled for it.
+ * Prevent the runtime PM framework from working with @dev by incrementing its
+ * "disable" counter.
+ *
+ * If the counter is zero when this function runs and there is a pending runtime
+ * resume request for @dev, it will be resumed. If the counter is still zero at
+ * that point, all of the pending runtime PM requests for @dev will be canceled
+ * and all runtime PM operations in progress involving it will be waited for to
+ * complete.
+ *
+ * For each invocation of this function for @dev, there must be a matching
+ * pm_runtime_enable() call, so that runtime PM is eventually enabled for it
+ * again.
*/
static inline void pm_runtime_disable(struct device *dev)
{
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ca947ed32e3d..54a623680019 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -380,8 +380,7 @@ config CPU_PM
config ENERGY_MODEL
bool "Energy Model for devices with DVFS (CPUs, GPUs, etc)"
- depends on SMP
- depends on CPU_FREQ
+ depends on CPU_FREQ || PM_DEVFREQ
help
Several subsystems (thermal and/or the task scheduler for example)
can leverage information about the energy consumed by devices to
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 3874f0e97651..d9b7e2b38c7a 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -161,22 +161,10 @@ static void em_debug_create_pd(struct device *dev) {}
static void em_debug_remove_pd(struct device *dev) {}
#endif
-static void em_destroy_table_rcu(struct rcu_head *rp)
-{
- struct em_perf_table __rcu *table;
-
- table = container_of(rp, struct em_perf_table, rcu);
- kfree(table);
-}
-
static void em_release_table_kref(struct kref *kref)
{
- struct em_perf_table __rcu *table;
-
/* It was the last owner of this table so we can free */
- table = container_of(kref, struct em_perf_table, kref);
-
- call_rcu(&table->rcu, em_destroy_table_rcu);
+ kfree_rcu(container_of(kref, struct em_perf_table, kref), rcu);
}
/**
@@ -185,7 +173,7 @@ static void em_release_table_kref(struct kref *kref)
*
* No return values.
*/
-void em_table_free(struct em_perf_table __rcu *table)
+void em_table_free(struct em_perf_table *table)
{
kref_put(&table->kref, em_release_table_kref);
}
@@ -198,9 +186,9 @@ void em_table_free(struct em_perf_table __rcu *table)
* has a user.
* Returns allocated table or NULL.
*/
-struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd)
+struct em_perf_table *em_table_alloc(struct em_perf_domain *pd)
{
- struct em_perf_table __rcu *table;
+ struct em_perf_table *table;
int table_size;
table_size = sizeof(struct em_perf_state) * pd->nr_perf_states;
@@ -239,7 +227,7 @@ static void em_init_performance(struct device *dev, struct em_perf_domain *pd,
}
static int em_compute_costs(struct device *dev, struct em_perf_state *table,
- struct em_data_callback *cb, int nr_states,
+ const struct em_data_callback *cb, int nr_states,
unsigned long flags)
{
unsigned long prev_cost = ULONG_MAX;
@@ -308,9 +296,9 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
* Return 0 on success or an error code on failure.
*/
int em_dev_update_perf_domain(struct device *dev,
- struct em_perf_table __rcu *new_table)
+ struct em_perf_table *new_table)
{
- struct em_perf_table __rcu *old_table;
+ struct em_perf_table *old_table;
struct em_perf_domain *pd;
if (!dev)
@@ -327,7 +315,8 @@ int em_dev_update_perf_domain(struct device *dev,
kref_get(&new_table->kref);
- old_table = pd->em_table;
+ old_table = rcu_dereference_protected(pd->em_table,
+ lockdep_is_held(&em_pd_mutex));
rcu_assign_pointer(pd->em_table, new_table);
em_cpufreq_update_efficiencies(dev, new_table->state);
@@ -341,7 +330,7 @@ EXPORT_SYMBOL_GPL(em_dev_update_perf_domain);
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
struct em_perf_state *table,
- struct em_data_callback *cb,
+ const struct em_data_callback *cb,
unsigned long flags)
{
unsigned long power, freq, prev_freq = 0;
@@ -396,10 +385,11 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
}
static int em_create_pd(struct device *dev, int nr_states,
- struct em_data_callback *cb, cpumask_t *cpus,
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus,
unsigned long flags)
{
- struct em_perf_table __rcu *em_table;
+ struct em_perf_table *em_table;
struct em_perf_domain *pd;
struct device *cpu_dev;
int cpu, ret, num_cpus;
@@ -556,9 +546,10 @@ EXPORT_SYMBOL_GPL(em_cpu_get);
* Return 0 on success
*/
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
- struct em_data_callback *cb, cpumask_t *cpus,
- bool microwatts)
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus, bool microwatts)
{
+ struct em_perf_table *em_table;
unsigned long cap, prev_cap = 0;
unsigned long flags = 0;
int cpu, ret;
@@ -631,7 +622,9 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
dev->em_pd->min_perf_state = 0;
dev->em_pd->max_perf_state = nr_states - 1;
- em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state);
+ em_table = rcu_dereference_protected(dev->em_pd->em_table,
+ lockdep_is_held(&em_pd_mutex));
+ em_cpufreq_update_efficiencies(dev, em_table->state);
em_debug_create_pd(dev);
dev_info(dev, "EM: created perf domain\n");
@@ -668,7 +661,8 @@ void em_dev_unregister_perf_domain(struct device *dev)
mutex_lock(&em_pd_mutex);
em_debug_remove_pd(dev);
- em_table_free(dev->em_pd->em_table);
+ em_table_free(rcu_dereference_protected(dev->em_pd->em_table,
+ lockdep_is_held(&em_pd_mutex)));
kfree(dev->em_pd);
dev->em_pd = NULL;
@@ -676,9 +670,9 @@ void em_dev_unregister_perf_domain(struct device *dev)
}
EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
-static struct em_perf_table __rcu *em_table_dup(struct em_perf_domain *pd)
+static struct em_perf_table *em_table_dup(struct em_perf_domain *pd)
{
- struct em_perf_table __rcu *em_table;
+ struct em_perf_table *em_table;
struct em_perf_state *ps, *new_ps;
int ps_size;
@@ -700,7 +694,7 @@ static struct em_perf_table __rcu *em_table_dup(struct em_perf_domain *pd)
}
static int em_recalc_and_update(struct device *dev, struct em_perf_domain *pd,
- struct em_perf_table __rcu *em_table)
+ struct em_perf_table *em_table)
{
int ret;
@@ -728,10 +722,9 @@ free_em_table:
* are correctly calculated.
*/
static void em_adjust_new_capacity(struct device *dev,
- struct em_perf_domain *pd,
- u64 max_cap)
+ struct em_perf_domain *pd)
{
- struct em_perf_table __rcu *em_table;
+ struct em_perf_table *em_table;
em_table = em_table_dup(pd);
if (!em_table) {
@@ -775,7 +768,8 @@ static void em_check_capacity_update(void)
}
cpufreq_cpu_put(policy);
- pd = em_cpu_get(cpu);
+ dev = get_cpu_device(cpu);
+ pd = em_pd_get(dev);
if (!pd || em_is_artificial(pd))
continue;
@@ -799,8 +793,7 @@ static void em_check_capacity_update(void)
pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n",
cpu, cpu_capacity, em_max_perf);
- dev = get_cpu_device(cpu);
- em_adjust_new_capacity(dev, pd, cpu_capacity);
+ em_adjust_new_capacity(dev, pd);
}
free_cpumask_var(cpu_done_mask);
@@ -822,7 +815,7 @@ static void em_update_workfn(struct work_struct *work)
*/
int em_dev_update_chip_binning(struct device *dev)
{
- struct em_perf_table __rcu *em_table;
+ struct em_perf_table *em_table;
struct em_perf_domain *pd;
int i, ret;