summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/acpi_tad.c24
-rw-r--r--drivers/base/power/generic_ops.c85
-rw-r--r--drivers/base/power/main.c15
-rw-r--r--drivers/base/power/runtime.c23
-rw-r--r--drivers/base/power/trace.c4
-rw-r--r--drivers/base/power/wakeup.c24
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c2
-rw-r--r--drivers/cpufreq/amd-pstate.c35
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c17
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c1
-rw-r--r--drivers/cpufreq/cpufreq-nforce2.c3
-rw-r--r--drivers/cpufreq/cpufreq.c11
-rw-r--r--drivers/cpufreq/intel_pstate.c227
-rw-r--r--drivers/cpufreq/qcom-cpufreq-nvmem.c35
-rw-r--r--drivers/cpufreq/s5pv210-cpufreq.c6
-rw-r--r--drivers/cpufreq/tegra186-cpufreq.c150
-rw-r--r--drivers/cpufreq/tegra194-cpufreq.c3
-rw-r--r--drivers/cpuidle/cpuidle.c12
-rw-r--r--drivers/cpuidle/driver.c10
-rw-r--r--drivers/cpuidle/governor.c4
-rw-r--r--drivers/cpuidle/governors/menu.c9
-rw-r--r--drivers/cpuidle/governors/teo.c159
-rw-r--r--drivers/cpuidle/poll_state.c4
-rw-r--r--drivers/devfreq/devfreq.c2
-rw-r--r--drivers/devfreq/governor.h127
-rw-r--r--drivers/devfreq/governor_passive.c27
-rw-r--r--drivers/devfreq/governor_performance.c2
-rw-r--r--drivers/devfreq/governor_powersave.c2
-rw-r--r--drivers/devfreq/governor_simpleondemand.c6
-rw-r--r--drivers/devfreq/governor_userspace.c2
-rw-r--r--drivers/devfreq/hisi_uncore_freq.c6
-rw-r--r--drivers/devfreq/tegra30-devfreq.c15
-rw-r--r--drivers/opp/core.c69
-rw-r--r--drivers/opp/cpu.c16
-rw-r--r--drivers/opp/of.c125
-rw-r--r--drivers/pci/pci-sysfs.c4
-rw-r--r--drivers/pmdomain/core.c10
-rw-r--r--drivers/pmdomain/governor.c33
-rw-r--r--drivers/powercap/intel_rapl_common.c39
-rw-r--r--drivers/powercap/intel_rapl_msr.c43
-rw-r--r--drivers/powercap/intel_rapl_tpmi.c2
-rw-r--r--drivers/scsi/mesh.c1
-rw-r--r--drivers/scsi/stex.c1
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c2
-rw-r--r--drivers/usb/host/sl811-hcd.c1
45 files changed, 764 insertions, 634 deletions
diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c
index c9487c5bb7b3..6d870d97ada6 100644
--- a/drivers/acpi/acpi_tad.c
+++ b/drivers/acpi/acpi_tad.c
@@ -90,8 +90,8 @@ static int acpi_tad_set_real_time(struct device *dev, struct acpi_tad_rt *rt)
args[0].buffer.pointer = (u8 *)rt;
args[0].buffer.length = sizeof(*rt);
- ACQUIRE(pm_runtime_active_try, pm)(dev);
- if (ACQUIRE_ERR(pm_runtime_active_try, &pm))
+ PM_RUNTIME_ACQUIRE(dev, pm);
+ if (PM_RUNTIME_ACQUIRE_ERR(&pm))
return -ENXIO;
status = acpi_evaluate_integer(handle, "_SRT", &arg_list, &retval);
@@ -137,8 +137,8 @@ static int acpi_tad_get_real_time(struct device *dev, struct acpi_tad_rt *rt)
{
int ret;
- ACQUIRE(pm_runtime_active_try, pm)(dev);
- if (ACQUIRE_ERR(pm_runtime_active_try, &pm))
+ PM_RUNTIME_ACQUIRE(dev, pm);
+ if (PM_RUNTIME_ACQUIRE_ERR(&pm))
return -ENXIO;
ret = acpi_tad_evaluate_grt(dev, rt);
@@ -275,8 +275,8 @@ static int acpi_tad_wake_set(struct device *dev, char *method, u32 timer_id,
args[0].integer.value = timer_id;
args[1].integer.value = value;
- ACQUIRE(pm_runtime_active_try, pm)(dev);
- if (ACQUIRE_ERR(pm_runtime_active_try, &pm))
+ PM_RUNTIME_ACQUIRE(dev, pm);
+ if (PM_RUNTIME_ACQUIRE_ERR(&pm))
return -ENXIO;
status = acpi_evaluate_integer(handle, method, &arg_list, &retval);
@@ -322,8 +322,8 @@ static ssize_t acpi_tad_wake_read(struct device *dev, char *buf, char *method,
args[0].integer.value = timer_id;
- ACQUIRE(pm_runtime_active_try, pm)(dev);
- if (ACQUIRE_ERR(pm_runtime_active_try, &pm))
+ PM_RUNTIME_ACQUIRE(dev, pm);
+ if (PM_RUNTIME_ACQUIRE_ERR(&pm))
return -ENXIO;
status = acpi_evaluate_integer(handle, method, &arg_list, &retval);
@@ -377,8 +377,8 @@ static int acpi_tad_clear_status(struct device *dev, u32 timer_id)
args[0].integer.value = timer_id;
- ACQUIRE(pm_runtime_active_try, pm)(dev);
- if (ACQUIRE_ERR(pm_runtime_active_try, &pm))
+ PM_RUNTIME_ACQUIRE(dev, pm);
+ if (PM_RUNTIME_ACQUIRE_ERR(&pm))
return -ENXIO;
status = acpi_evaluate_integer(handle, "_CWS", &arg_list, &retval);
@@ -417,8 +417,8 @@ static ssize_t acpi_tad_status_read(struct device *dev, char *buf, u32 timer_id)
args[0].integer.value = timer_id;
- ACQUIRE(pm_runtime_active_try, pm)(dev);
- if (ACQUIRE_ERR(pm_runtime_active_try, &pm))
+ PM_RUNTIME_ACQUIRE(dev, pm);
+ if (PM_RUNTIME_ACQUIRE_ERR(&pm))
return -ENXIO;
status = acpi_evaluate_integer(handle, "_GWS", &arg_list, &retval);
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 6502720bb564..af99bbcf281c 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -8,6 +8,13 @@
#include <linux/pm_runtime.h>
#include <linux/export.h>
+#define CALL_PM_OP(dev, op) \
+({ \
+ struct device *_dev = (dev); \
+ const struct dev_pm_ops *pm = _dev->driver ? _dev->driver->pm : NULL; \
+ pm && pm->op ? pm->op(_dev) : 0; \
+})
+
#ifdef CONFIG_PM
/**
* pm_generic_runtime_suspend - Generic runtime suspend callback for subsystems.
@@ -19,12 +26,7 @@
*/
int pm_generic_runtime_suspend(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
- int ret;
-
- ret = pm && pm->runtime_suspend ? pm->runtime_suspend(dev) : 0;
-
- return ret;
+ return CALL_PM_OP(dev, runtime_suspend);
}
EXPORT_SYMBOL_GPL(pm_generic_runtime_suspend);
@@ -38,12 +40,7 @@ EXPORT_SYMBOL_GPL(pm_generic_runtime_suspend);
*/
int pm_generic_runtime_resume(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
- int ret;
-
- ret = pm && pm->runtime_resume ? pm->runtime_resume(dev) : 0;
-
- return ret;
+ return CALL_PM_OP(dev, runtime_resume);
}
EXPORT_SYMBOL_GPL(pm_generic_runtime_resume);
#endif /* CONFIG_PM */
@@ -72,9 +69,7 @@ int pm_generic_prepare(struct device *dev)
*/
int pm_generic_suspend_noirq(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->suspend_noirq ? pm->suspend_noirq(dev) : 0;
+ return CALL_PM_OP(dev, suspend_noirq);
}
EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq);
@@ -84,9 +79,7 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq);
*/
int pm_generic_suspend_late(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->suspend_late ? pm->suspend_late(dev) : 0;
+ return CALL_PM_OP(dev, suspend_late);
}
EXPORT_SYMBOL_GPL(pm_generic_suspend_late);
@@ -96,9 +89,7 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend_late);
*/
int pm_generic_suspend(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->suspend ? pm->suspend(dev) : 0;
+ return CALL_PM_OP(dev, suspend);
}
EXPORT_SYMBOL_GPL(pm_generic_suspend);
@@ -108,9 +99,7 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend);
*/
int pm_generic_freeze_noirq(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->freeze_noirq ? pm->freeze_noirq(dev) : 0;
+ return CALL_PM_OP(dev, freeze_noirq);
}
EXPORT_SYMBOL_GPL(pm_generic_freeze_noirq);
@@ -120,9 +109,7 @@ EXPORT_SYMBOL_GPL(pm_generic_freeze_noirq);
*/
int pm_generic_freeze(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->freeze ? pm->freeze(dev) : 0;
+ return CALL_PM_OP(dev, freeze);
}
EXPORT_SYMBOL_GPL(pm_generic_freeze);
@@ -132,9 +119,7 @@ EXPORT_SYMBOL_GPL(pm_generic_freeze);
*/
int pm_generic_poweroff_noirq(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->poweroff_noirq ? pm->poweroff_noirq(dev) : 0;
+ return CALL_PM_OP(dev, poweroff_noirq);
}
EXPORT_SYMBOL_GPL(pm_generic_poweroff_noirq);
@@ -144,9 +129,7 @@ EXPORT_SYMBOL_GPL(pm_generic_poweroff_noirq);
*/
int pm_generic_poweroff_late(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->poweroff_late ? pm->poweroff_late(dev) : 0;
+ return CALL_PM_OP(dev, poweroff_late);
}
EXPORT_SYMBOL_GPL(pm_generic_poweroff_late);
@@ -156,9 +139,7 @@ EXPORT_SYMBOL_GPL(pm_generic_poweroff_late);
*/
int pm_generic_poweroff(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->poweroff ? pm->poweroff(dev) : 0;
+ return CALL_PM_OP(dev, poweroff);
}
EXPORT_SYMBOL_GPL(pm_generic_poweroff);
@@ -168,9 +149,7 @@ EXPORT_SYMBOL_GPL(pm_generic_poweroff);
*/
int pm_generic_thaw_noirq(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->thaw_noirq ? pm->thaw_noirq(dev) : 0;
+ return CALL_PM_OP(dev, thaw_noirq);
}
EXPORT_SYMBOL_GPL(pm_generic_thaw_noirq);
@@ -180,9 +159,7 @@ EXPORT_SYMBOL_GPL(pm_generic_thaw_noirq);
*/
int pm_generic_thaw(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->thaw ? pm->thaw(dev) : 0;
+ return CALL_PM_OP(dev, thaw);
}
EXPORT_SYMBOL_GPL(pm_generic_thaw);
@@ -192,9 +169,7 @@ EXPORT_SYMBOL_GPL(pm_generic_thaw);
*/
int pm_generic_resume_noirq(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->resume_noirq ? pm->resume_noirq(dev) : 0;
+ return CALL_PM_OP(dev, resume_noirq);
}
EXPORT_SYMBOL_GPL(pm_generic_resume_noirq);
@@ -204,9 +179,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume_noirq);
*/
int pm_generic_resume_early(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->resume_early ? pm->resume_early(dev) : 0;
+ return CALL_PM_OP(dev, resume_early);
}
EXPORT_SYMBOL_GPL(pm_generic_resume_early);
@@ -216,9 +189,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume_early);
*/
int pm_generic_resume(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->resume ? pm->resume(dev) : 0;
+ return CALL_PM_OP(dev, resume);
}
EXPORT_SYMBOL_GPL(pm_generic_resume);
@@ -228,9 +199,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume);
*/
int pm_generic_restore_noirq(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->restore_noirq ? pm->restore_noirq(dev) : 0;
+ return CALL_PM_OP(dev, restore_noirq);
}
EXPORT_SYMBOL_GPL(pm_generic_restore_noirq);
@@ -240,9 +209,7 @@ EXPORT_SYMBOL_GPL(pm_generic_restore_noirq);
*/
int pm_generic_restore_early(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->restore_early ? pm->restore_early(dev) : 0;
+ return CALL_PM_OP(dev, restore_early);
}
EXPORT_SYMBOL_GPL(pm_generic_restore_early);
@@ -252,9 +219,7 @@ EXPORT_SYMBOL_GPL(pm_generic_restore_early);
*/
int pm_generic_restore(struct device *dev)
{
- const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
- return pm && pm->restore ? pm->restore(dev) : 0;
+ return CALL_PM_OP(dev, restore);
}
EXPORT_SYMBOL_GPL(pm_generic_restore);
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 1de1cd72b616..a0225a83f50c 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -34,6 +34,7 @@
#include <linux/cpufreq.h>
#include <linux/devfreq.h>
#include <linux/timer.h>
+#include <linux/nmi.h>
#include "../base.h"
#include "power.h"
@@ -95,6 +96,8 @@ static const char *pm_verb(int event)
return "restore";
case PM_EVENT_RECOVER:
return "recover";
+ case PM_EVENT_POWEROFF:
+ return "poweroff";
default:
return "(unknown PM event)";
}
@@ -367,6 +370,7 @@ static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state)
case PM_EVENT_FREEZE:
case PM_EVENT_QUIESCE:
return ops->freeze;
+ case PM_EVENT_POWEROFF:
case PM_EVENT_HIBERNATE:
return ops->poweroff;
case PM_EVENT_THAW:
@@ -401,6 +405,7 @@ static pm_callback_t pm_late_early_op(const struct dev_pm_ops *ops,
case PM_EVENT_FREEZE:
case PM_EVENT_QUIESCE:
return ops->freeze_late;
+ case PM_EVENT_POWEROFF:
case PM_EVENT_HIBERNATE:
return ops->poweroff_late;
case PM_EVENT_THAW:
@@ -435,6 +440,7 @@ static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t stat
case PM_EVENT_FREEZE:
case PM_EVENT_QUIESCE:
return ops->freeze_noirq;
+ case PM_EVENT_POWEROFF:
case PM_EVENT_HIBERNATE:
return ops->poweroff_noirq;
case PM_EVENT_THAW:
@@ -515,6 +521,11 @@ struct dpm_watchdog {
#define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \
struct dpm_watchdog wd
+static bool __read_mostly dpm_watchdog_all_cpu_backtrace;
+module_param(dpm_watchdog_all_cpu_backtrace, bool, 0644);
+MODULE_PARM_DESC(dpm_watchdog_all_cpu_backtrace,
+ "Backtrace all CPUs on DPM watchdog timeout");
+
/**
* dpm_watchdog_handler - Driver suspend / resume watchdog handler.
* @t: The timer that PM watchdog depends on.
@@ -530,8 +541,12 @@ static void dpm_watchdog_handler(struct timer_list *t)
unsigned int time_left;
if (wd->fatal) {
+ unsigned int this_cpu = smp_processor_id();
+
dev_emerg(wd->dev, "**** DPM device timeout ****\n");
show_stack(wd->tsk, NULL, KERN_EMERG);
+ if (dpm_watchdog_all_cpu_backtrace)
+ trigger_allbutcpu_cpu_backtrace(this_cpu);
panic("%s %s: unrecoverable failure\n",
dev_driver_string(wd->dev), dev_name(wd->dev));
}
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 1b11a3cd4acc..62707738caa4 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -90,7 +90,7 @@ static void update_pm_runtime_accounting(struct device *dev)
/*
* Because ktime_get_mono_fast_ns() is not monotonic during
* timekeeping updates, ensure that 'now' is after the last saved
- * timesptamp.
+ * timestamp.
*/
if (now < last)
return;
@@ -217,7 +217,7 @@ static int dev_memalloc_noio(struct device *dev, void *data)
* resume/suspend callback of any one of its ancestors(or the
* block device itself), the deadlock may be triggered inside the
* memory allocation since it might not complete until the block
- * device becomes active and the involed page I/O finishes. The
+ * device becomes active and the involved page I/O finishes. The
* situation is pointed out first by Alan Stern. Network device
* are involved in iSCSI kind of situation.
*
@@ -1210,7 +1210,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_resume);
*
* Otherwise, if its runtime PM status is %RPM_ACTIVE and (1) @ign_usage_count
* is set, or (2) @dev is not ignoring children and its active child count is
- * nonero, or (3) the runtime PM usage counter of @dev is not zero, increment
+ * nonzero, or (3) the runtime PM usage counter of @dev is not zero, increment
* the usage counter of @dev and return 1.
*
* Otherwise, return 0 without changing the usage counter.
@@ -1664,9 +1664,12 @@ EXPORT_SYMBOL_GPL(devm_pm_runtime_get_noresume);
* pm_runtime_forbid - Block runtime PM of a device.
* @dev: Device to handle.
*
- * Increase the device's usage count and clear its power.runtime_auto flag,
- * so that it cannot be suspended at run time until pm_runtime_allow() is called
- * for it.
+ * Resume @dev if already suspended and block runtime suspend of @dev in such
+ * a way that it can be unblocked via the /sys/devices/.../power/control
+ * interface, or otherwise by calling pm_runtime_allow().
+ *
+ * Calling this function many times in a row has the same effect as calling it
+ * once.
*/
void pm_runtime_forbid(struct device *dev)
{
@@ -1687,7 +1690,13 @@ EXPORT_SYMBOL_GPL(pm_runtime_forbid);
* pm_runtime_allow - Unblock runtime PM of a device.
* @dev: Device to handle.
*
- * Decrease the device's usage count and set its power.runtime_auto flag.
+ * Unblock runtime suspend of @dev after it has been blocked by
+ * pm_runtime_forbid() (for instance, if it has been blocked via the
+ * /sys/devices/.../power/control interface), check if @dev can be
+ * suspended and suspend it in that case.
+ *
+ * Calling this function many times in a row has the same effect as calling it
+ * once.
*/
void pm_runtime_allow(struct device *dev)
{
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index cd6e559648b2..d8da7195bb00 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -238,10 +238,8 @@ int show_trace_dev_match(char *buf, size_t size)
unsigned int hash = hash_string(DEVSEED, dev_name(dev),
DEVHASH);
if (hash == value) {
- int len = snprintf(buf, size, "%s\n",
+ int len = scnprintf(buf, size, "%s\n",
dev_driver_string(dev));
- if (len > size)
- len = size;
buf += len;
ret += len;
size -= len;
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index d1283ff1080b..1e1a0e7eeac5 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -189,17 +189,16 @@ static void wakeup_source_remove(struct wakeup_source *ws)
if (WARN_ON(!ws))
return;
+ /*
+ * After shutting down the timer, wakeup_source_activate() will warn if
+ * the given wakeup source is passed to it.
+ */
+ timer_shutdown_sync(&ws->timer);
+
raw_spin_lock_irqsave(&events_lock, flags);
list_del_rcu(&ws->entry);
raw_spin_unlock_irqrestore(&events_lock, flags);
synchronize_srcu(&wakeup_srcu);
-
- timer_delete_sync(&ws->timer);
- /*
- * Clear timer.function to make wakeup_source_not_registered() treat
- * this wakeup source as not registered.
- */
- ws->timer.function = NULL;
}
/**
@@ -506,14 +505,14 @@ int device_set_wakeup_enable(struct device *dev, bool enable)
EXPORT_SYMBOL_GPL(device_set_wakeup_enable);
/**
- * wakeup_source_not_registered - validate the given wakeup source.
+ * wakeup_source_not_usable - validate the given wakeup source.
* @ws: Wakeup source to be validated.
*/
-static bool wakeup_source_not_registered(struct wakeup_source *ws)
+static bool wakeup_source_not_usable(struct wakeup_source *ws)
{
/*
- * Use timer struct to check if the given source is initialized
- * by wakeup_source_add.
+ * Use the timer struct to check if the given wakeup source has been
+ * initialized by wakeup_source_add() and it is not going away.
*/
return ws->timer.function != pm_wakeup_timer_fn;
}
@@ -558,8 +557,7 @@ static void wakeup_source_activate(struct wakeup_source *ws)
{
unsigned int cec;
- if (WARN_ONCE(wakeup_source_not_registered(ws),
- "unregistered wakeup source\n"))
+ if (WARN_ONCE(wakeup_source_not_usable(ws), "unusable wakeup source\n"))
return;
ws->active = true;
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 083d8369a591..e73a66785d69 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -395,7 +395,7 @@ static unsigned int check_freqs(struct cpufreq_policy *policy,
cur_freq = extract_freq(policy, get_cur_val(mask, data));
if (cur_freq == freq)
return 1;
- udelay(10);
+ usleep_range(10, 15);
}
return 0;
}
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index b44f0f7a5ba1..c45bc98721d2 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -65,13 +65,13 @@ static const char * const amd_pstate_mode_string[] = {
[AMD_PSTATE_PASSIVE] = "passive",
[AMD_PSTATE_ACTIVE] = "active",
[AMD_PSTATE_GUIDED] = "guided",
- NULL,
};
+static_assert(ARRAY_SIZE(amd_pstate_mode_string) == AMD_PSTATE_MAX);
const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode)
{
- if (mode < 0 || mode >= AMD_PSTATE_MAX)
- return NULL;
+ if (mode < AMD_PSTATE_UNDEFINED || mode >= AMD_PSTATE_MAX)
+ mode = AMD_PSTATE_UNDEFINED;
return amd_pstate_mode_string[mode];
}
EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string);
@@ -110,6 +110,7 @@ enum energy_perf_value_index {
EPP_INDEX_BALANCE_PERFORMANCE,
EPP_INDEX_BALANCE_POWERSAVE,
EPP_INDEX_POWERSAVE,
+ EPP_INDEX_MAX,
};
static const char * const energy_perf_strings[] = {
@@ -118,8 +119,8 @@ static const char * const energy_perf_strings[] = {
[EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
[EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
[EPP_INDEX_POWERSAVE] = "power",
- NULL
};
+static_assert(ARRAY_SIZE(energy_perf_strings) == EPP_INDEX_MAX);
static unsigned int epp_values[] = {
[EPP_INDEX_DEFAULT] = 0,
@@ -127,7 +128,8 @@ static unsigned int epp_values[] = {
[EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
[EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
[EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
- };
+};
+static_assert(ARRAY_SIZE(epp_values) == EPP_INDEX_MAX);
typedef int (*cppc_mode_transition_fn)(int);
@@ -183,7 +185,7 @@ static inline int get_mode_idx_from_str(const char *str, size_t size)
{
int i;
- for (i=0; i < AMD_PSTATE_MAX; i++) {
+ for (i = 0; i < AMD_PSTATE_MAX; i++) {
if (!strncmp(str, amd_pstate_mode_string[i], size))
return i;
}
@@ -1137,16 +1139,15 @@ static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
static ssize_t show_energy_performance_available_preferences(
struct cpufreq_policy *policy, char *buf)
{
- int i = 0;
- int offset = 0;
+ int offset = 0, i;
struct amd_cpudata *cpudata = policy->driver_data;
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
return sysfs_emit_at(buf, offset, "%s\n",
energy_perf_strings[EPP_INDEX_PERFORMANCE]);
- while (energy_perf_strings[i] != NULL)
- offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
+ for (i = 0; i < ARRAY_SIZE(energy_perf_strings); i++)
+ offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i]);
offset += sysfs_emit_at(buf, offset, "\n");
@@ -1157,15 +1158,10 @@ static ssize_t store_energy_performance_preference(
struct cpufreq_policy *policy, const char *buf, size_t count)
{
struct amd_cpudata *cpudata = policy->driver_data;
- char str_preference[21];
ssize_t ret;
u8 epp;
- ret = sscanf(buf, "%20s", str_preference);
- if (ret != 1)
- return -EINVAL;
-
- ret = match_string(energy_perf_strings, -1, str_preference);
+ ret = sysfs_match_string(energy_perf_strings, buf);
if (ret < 0)
return -EINVAL;
@@ -1282,7 +1278,7 @@ static int amd_pstate_change_mode_without_dvr_change(int mode)
if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE)
return 0;
- for_each_present_cpu(cpu) {
+ for_each_online_cpu(cpu) {
cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
}
@@ -1353,9 +1349,8 @@ int amd_pstate_update_status(const char *buf, size_t size)
return -EINVAL;
mode_idx = get_mode_idx_from_str(buf, size);
-
- if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX)
- return -EINVAL;
+ if (mode_idx < 0)
+ return mode_idx;
if (mode_state_machine[cppc_state][mode_idx]) {
guard(mutex)(&amd_pstate_driver_lock);
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index e23d9abea135..9eac77c4f294 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -142,16 +142,15 @@ static void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy)
init_irq_work(&cppc_fi->irq_work, cppc_irq_work);
ret = cppc_get_perf_ctrs(cpu, &cppc_fi->prev_perf_fb_ctrs);
- if (ret) {
- pr_warn("%s: failed to read perf counters for cpu:%d: %d\n",
- __func__, cpu, ret);
- /*
- * Don't abort if the CPU was offline while the driver
- * was getting registered.
- */
- if (cpu_online(cpu))
- return;
+ /*
+ * Don't abort as the CPU was offline while the driver was
+ * getting registered.
+ */
+ if (ret && cpu_online(cpu)) {
+ pr_debug("%s: failed to read perf counters for cpu:%d: %d\n",
+ __func__, cpu, ret);
+ return;
}
}
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index cd1816a12bb9..dc11b62399ad 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -87,6 +87,7 @@ static const struct of_device_id allowlist[] __initconst = {
{ .compatible = "st-ericsson,u9540", },
{ .compatible = "starfive,jh7110", },
+ { .compatible = "starfive,jh7110s", },
{ .compatible = "ti,omap2", },
{ .compatible = "ti,omap4", },
diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index fedad1081973..fbbbe501cf2d 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -145,6 +145,8 @@ static unsigned int nforce2_fsb_read(int bootfsb)
pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
fsb /= 1000000;
+ pci_dev_put(nforce2_sub5);
+
/* Check if PLL register is already set */
pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
@@ -426,6 +428,7 @@ static int __init nforce2_init(void)
static void __exit nforce2_exit(void)
{
cpufreq_unregister_driver(&nforce2_driver);
+ pci_dev_put(nforce2_dev);
}
module_init(nforce2_init);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 852e024facc3..4472bb1ec83c 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1421,9 +1421,12 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy,
* If there is a problem with its frequency table, take it
* offline and drop it.
*/
- ret = cpufreq_table_validate_and_sort(policy);
- if (ret)
- goto out_offline_policy;
+ if (policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_ASCENDING &&
+ policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_DESCENDING) {
+ ret = cpufreq_table_validate_and_sort(policy);
+ if (ret)
+ goto out_offline_policy;
+ }
/* related_cpus should at least include policy->cpus. */
cpumask_copy(policy->related_cpus, policy->cpus);
@@ -2550,7 +2553,7 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor)
for_each_inactive_policy(policy) {
if (!strcmp(policy->last_governor, governor->name)) {
policy->governor = NULL;
- strcpy(policy->last_governor, "\0");
+ policy->last_governor[0] = '\0';
}
}
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 492a10f1bdbf..ec4abe374573 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -575,13 +575,18 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
int scaling = cpu->pstate.scaling;
int freq;
- pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
- pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
- pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling);
+ pr_debug("CPU%d: PERF_CTL max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
+ pr_debug("CPU%d: PERF_CTL turbo = %d\n", cpu->cpu, perf_ctl_turbo);
+ pr_debug("CPU%d: PERF_CTL scaling = %d\n", cpu->cpu, perf_ctl_scaling);
pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
+ if (scaling == perf_ctl_scaling)
+ return;
+
+ hwp_is_hybrid = true;
+
cpu->pstate.turbo_freq = rounddown(cpu->pstate.turbo_pstate * scaling,
perf_ctl_scaling);
cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
@@ -909,6 +914,11 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
[HWP_CPUFREQ_ATTR_COUNT] = NULL,
};
+static u8 hybrid_get_cpu_type(unsigned int cpu)
+{
+ return cpu_data(cpu).topo.intel_type;
+}
+
static bool no_cas __ro_after_init;
static struct cpudata *hybrid_max_perf_cpu __read_mostly;
@@ -925,11 +935,8 @@ static int hybrid_active_power(struct device *dev, unsigned long *power,
unsigned long *freq)
{
/*
- * Create "utilization bins" of 0-40%, 40%-60%, 60%-80%, and 80%-100%
- * of the maximum capacity such that two CPUs of the same type will be
- * regarded as equally attractive if the utilization of each of them
- * falls into the same bin, which should prevent tasks from being
- * migrated between them too often.
+ * Create four "states" corresponding to 40%, 60%, 80%, and 100% of the
+ * full capacity.
*
* For this purpose, return the "frequency" of 2 for the first
* performance level and otherwise leave the value set by the caller.
@@ -943,38 +950,40 @@ static int hybrid_active_power(struct device *dev, unsigned long *power,
return 0;
}
+static bool hybrid_has_l3(unsigned int cpu)
+{
+ struct cpu_cacheinfo *cacheinfo = get_cpu_cacheinfo(cpu);
+ unsigned int i;
+
+ if (!cacheinfo)
+ return false;
+
+ for (i = 0; i < cacheinfo->num_leaves; i++) {
+ if (cacheinfo->info_list[i].level == 3)
+ return true;
+ }
+
+ return false;
+}
+
static int hybrid_get_cost(struct device *dev, unsigned long freq,
unsigned long *cost)
{
- struct pstate_data *pstate = &all_cpu_data[dev->id]->pstate;
- struct cpu_cacheinfo *cacheinfo = get_cpu_cacheinfo(dev->id);
-
+ /* Facilitate load balancing between CPUs of the same type. */
+ *cost = freq;
/*
- * The smaller the perf-to-frequency scaling factor, the larger the IPC
- * ratio between the given CPU and the least capable CPU in the system.
- * Regard that IPC ratio as the primary cost component and assume that
- * the scaling factors for different CPU types will differ by at least
- * 5% and they will not be above INTEL_PSTATE_CORE_SCALING.
+ * Adjust the cost depending on CPU type.
*
- * Add the freq value to the cost, so that the cost of running on CPUs
- * of the same type in different "utilization bins" is different.
- */
- *cost = div_u64(100ULL * INTEL_PSTATE_CORE_SCALING, pstate->scaling) + freq;
- /*
- * Increase the cost slightly for CPUs able to access L3 to avoid
- * touching it in case some other CPUs of the same type can do the work
- * without it.
+ * The idea is to start loading up LPE-cores before E-cores and start
+ * to populate E-cores when LPE-cores are utilized above 60% of the
+ * capacity. Similarly, P-cores start to be populated when E-cores are
+ * utilized above 60% of the capacity.
*/
- if (cacheinfo) {
- unsigned int i;
-
- /* Check if L3 cache is there. */
- for (i = 0; i < cacheinfo->num_leaves; i++) {
- if (cacheinfo->info_list[i].level == 3) {
- *cost += 2;
- break;
- }
- }
+ if (hybrid_get_cpu_type(dev->id) == INTEL_CPU_TYPE_ATOM) {
+ if (hybrid_has_l3(dev->id)) /* E-core */
+ *cost += 1;
+ } else { /* P-core */
+ *cost += 2;
}
return 0;
@@ -1037,9 +1046,9 @@ static void hybrid_set_cpu_capacity(struct cpudata *cpu)
topology_set_cpu_scale(cpu->cpu, arch_scale_cpu_capacity(cpu->cpu));
- pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu,
- cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf,
- cpu->pstate.max_pstate_physical);
+ pr_debug("CPU%d: capacity perf = %u, base perf = %u, sys max perf = %u\n",
+ cpu->cpu, cpu->capacity_perf, cpu->pstate.max_pstate_physical,
+ hybrid_max_perf_cpu->capacity_perf);
}
static void hybrid_clear_cpu_capacity(unsigned int cpunum)
@@ -1384,7 +1393,8 @@ static void set_power_ctl_ee_state(bool input)
{
u64 power_ctl;
- mutex_lock(&intel_pstate_driver_lock);
+ guard(mutex)(&intel_pstate_driver_lock);
+
rdmsrq(MSR_IA32_POWER_CTL, power_ctl);
if (input) {
power_ctl &= ~BIT(MSR_IA32_POWER_CTL_BIT_EE);
@@ -1394,7 +1404,6 @@ static void set_power_ctl_ee_state(bool input)
power_ctl_ee_state = POWER_CTL_EE_DISABLE;
}
wrmsrq(MSR_IA32_POWER_CTL, power_ctl);
- mutex_unlock(&intel_pstate_driver_lock);
}
static void intel_pstate_hwp_enable(struct cpudata *cpudata);
@@ -1516,13 +1525,9 @@ static int intel_pstate_update_status(const char *buf, size_t size);
static ssize_t show_status(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- ssize_t ret;
-
- mutex_lock(&intel_pstate_driver_lock);
- ret = intel_pstate_show_status(buf);
- mutex_unlock(&intel_pstate_driver_lock);
+ guard(mutex)(&intel_pstate_driver_lock);
- return ret;
+ return intel_pstate_show_status(buf);
}
static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
@@ -1531,11 +1536,13 @@ static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
char *p = memchr(buf, '\n', count);
int ret;
- mutex_lock(&intel_pstate_driver_lock);
+ guard(mutex)(&intel_pstate_driver_lock);
+
ret = intel_pstate_update_status(buf, p ? p - buf : count);
- mutex_unlock(&intel_pstate_driver_lock);
+ if (ret < 0)
+ return ret;
- return ret < 0 ? ret : count;
+ return count;
}
static ssize_t show_turbo_pct(struct kobject *kobj,
@@ -1545,12 +1552,10 @@ static ssize_t show_turbo_pct(struct kobject *kobj,
int total, no_turbo, turbo_pct;
uint32_t turbo_fp;
- mutex_lock(&intel_pstate_driver_lock);
+ guard(mutex)(&intel_pstate_driver_lock);
- if (!intel_pstate_driver) {
- mutex_unlock(&intel_pstate_driver_lock);
+ if (!intel_pstate_driver)
return -EAGAIN;
- }
cpu = all_cpu_data[0];
@@ -1559,8 +1564,6 @@ static ssize_t show_turbo_pct(struct kobject *kobj,
turbo_fp = div_fp(no_turbo, total);
turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
- mutex_unlock(&intel_pstate_driver_lock);
-
return sprintf(buf, "%u\n", turbo_pct);
}
@@ -1570,38 +1573,26 @@ static ssize_t show_num_pstates(struct kobject *kobj,
struct cpudata *cpu;
int total;
- mutex_lock(&intel_pstate_driver_lock);
+ guard(mutex)(&intel_pstate_driver_lock);
- if (!intel_pstate_driver) {
- mutex_unlock(&intel_pstate_driver_lock);
+ if (!intel_pstate_driver)
return -EAGAIN;
- }
cpu = all_cpu_data[0];
total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
- mutex_unlock(&intel_pstate_driver_lock);
-
return sprintf(buf, "%u\n", total);
}
static ssize_t show_no_turbo(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- ssize_t ret;
+ guard(mutex)(&intel_pstate_driver_lock);
- mutex_lock(&intel_pstate_driver_lock);
-
- if (!intel_pstate_driver) {
- mutex_unlock(&intel_pstate_driver_lock);
+ if (!intel_pstate_driver)
return -EAGAIN;
- }
-
- ret = sprintf(buf, "%u\n", global.no_turbo);
-
- mutex_unlock(&intel_pstate_driver_lock);
- return ret;
+ return sprintf(buf, "%u\n", global.no_turbo);
}
static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
@@ -1613,29 +1604,25 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
if (sscanf(buf, "%u", &input) != 1)
return -EINVAL;
- mutex_lock(&intel_pstate_driver_lock);
+ guard(mutex)(&intel_pstate_driver_lock);
- if (!intel_pstate_driver) {
- count = -EAGAIN;
- goto unlock_driver;
- }
+ if (!intel_pstate_driver)
+ return -EAGAIN;
no_turbo = !!clamp_t(int, input, 0, 1);
WRITE_ONCE(global.turbo_disabled, turbo_is_disabled());
if (global.turbo_disabled && !no_turbo) {
pr_notice("Turbo disabled by BIOS or unavailable on processor\n");
- count = -EPERM;
if (global.no_turbo)
- goto unlock_driver;
- else
- no_turbo = 1;
- }
+ return -EPERM;
- if (no_turbo == global.no_turbo) {
- goto unlock_driver;
+ no_turbo = 1;
}
+ if (no_turbo == global.no_turbo)
+ return count;
+
WRITE_ONCE(global.no_turbo, no_turbo);
mutex_lock(&intel_pstate_limits_lock);
@@ -1654,9 +1641,6 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
intel_pstate_update_limits_for_all();
arch_set_max_freq_ratio(no_turbo);
-unlock_driver:
- mutex_unlock(&intel_pstate_driver_lock);
-
return count;
}
@@ -1706,12 +1690,10 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b,
if (ret != 1)
return -EINVAL;
- mutex_lock(&intel_pstate_driver_lock);
+ guard(mutex)(&intel_pstate_driver_lock);
- if (!intel_pstate_driver) {
- mutex_unlock(&intel_pstate_driver_lock);
+ if (!intel_pstate_driver)
return -EAGAIN;
- }
mutex_lock(&intel_pstate_limits_lock);
@@ -1724,8 +1706,6 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b,
else
update_qos_requests(FREQ_QOS_MAX);
- mutex_unlock(&intel_pstate_driver_lock);
-
return count;
}
@@ -1739,12 +1719,10 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b,
if (ret != 1)
return -EINVAL;
- mutex_lock(&intel_pstate_driver_lock);
+ guard(mutex)(&intel_pstate_driver_lock);
- if (!intel_pstate_driver) {
- mutex_unlock(&intel_pstate_driver_lock);
+ if (!intel_pstate_driver)
return -EAGAIN;
- }
mutex_lock(&intel_pstate_limits_lock);
@@ -1758,8 +1736,6 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b,
else
update_qos_requests(FREQ_QOS_MIN);
- mutex_unlock(&intel_pstate_driver_lock);
-
return count;
}
@@ -1780,10 +1756,10 @@ static ssize_t store_hwp_dynamic_boost(struct kobject *a,
if (ret)
return ret;
- mutex_lock(&intel_pstate_driver_lock);
+ guard(mutex)(&intel_pstate_driver_lock);
+
hwp_boost = !!input;
intel_pstate_update_policies();
- mutex_unlock(&intel_pstate_driver_lock);
return count;
}
@@ -2072,6 +2048,18 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
intel_pstate_update_epp_defaults(cpudata);
}
+static u64 get_perf_ctl_val(int pstate)
+{
+ u64 val;
+
+ val = (u64)pstate << 8;
+ if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled) &&
+ cpu_feature_enabled(X86_FEATURE_IDA))
+ val |= (u64)1 << 32;
+
+ return val;
+}
+
static int atom_get_min_pstate(int not_used)
{
u64 value;
@@ -2098,15 +2086,10 @@ static int atom_get_turbo_pstate(int not_used)
static u64 atom_get_val(struct cpudata *cpudata, int pstate)
{
- u64 val;
+ u64 val = get_perf_ctl_val(pstate);
int32_t vid_fp;
u32 vid;
- val = (u64)pstate << 8;
- if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled) &&
- cpu_feature_enabled(X86_FEATURE_IDA))
- val |= (u64)1 << 32;
-
vid_fp = cpudata->vid.min + mul_fp(
int_tofp(pstate - cpudata->pstate.min_pstate),
cpudata->vid.ratio);
@@ -2266,14 +2249,7 @@ static int core_get_turbo_pstate(int cpu)
static u64 core_get_val(struct cpudata *cpudata, int pstate)
{
- u64 val;
-
- val = (u64)pstate << 8;
- if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled) &&
- cpu_feature_enabled(X86_FEATURE_IDA))
- val |= (u64)1 << 32;
-
- return val;
+ return get_perf_ctl_val(pstate);
}
static int knl_get_aperf_mperf_shift(void)
@@ -2297,18 +2273,14 @@ static int knl_get_turbo_pstate(int cpu)
static int hwp_get_cpu_scaling(int cpu)
{
if (hybrid_scaling_factor) {
- struct cpuinfo_x86 *c = &cpu_data(cpu);
- u8 cpu_type = c->topo.intel_type;
-
/*
* Return the hybrid scaling factor for P-cores and use the
* default core scaling for E-cores.
*/
- if (cpu_type == INTEL_CPU_TYPE_CORE)
+ if (hybrid_get_cpu_type(cpu) == INTEL_CPU_TYPE_CORE)
return hybrid_scaling_factor;
- if (cpu_type == INTEL_CPU_TYPE_ATOM)
- return core_get_scaling();
+ return core_get_scaling();
}
/* Use core scaling on non-hybrid systems. */
@@ -2343,11 +2315,10 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu)
static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
{
- int perf_ctl_max_phys = pstate_funcs.get_max_physical(cpu->cpu);
int perf_ctl_scaling = pstate_funcs.get_scaling();
+ cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(cpu->cpu);
cpu->pstate.min_pstate = pstate_funcs.get_min(cpu->cpu);
- cpu->pstate.max_pstate_physical = perf_ctl_max_phys;
cpu->pstate.perf_ctl_scaling = perf_ctl_scaling;
if (hwp_active && !hwp_mode_bdw) {
@@ -2355,10 +2326,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
if (pstate_funcs.get_cpu_scaling) {
cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
- if (cpu->pstate.scaling != perf_ctl_scaling) {
- intel_pstate_hybrid_hwp_adjust(cpu);
- hwp_is_hybrid = true;
- }
+ intel_pstate_hybrid_hwp_adjust(cpu);
} else {
cpu->pstate.scaling = perf_ctl_scaling;
}
@@ -2760,6 +2728,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
X86_MATCH(INTEL_ATOM_CRESTMONT, core_funcs),
X86_MATCH(INTEL_ATOM_CRESTMONT_X, core_funcs),
X86_MATCH(INTEL_ATOM_DARKMONT_X, core_funcs),
+ X86_MATCH(INTEL_DIAMONDRAPIDS_X, core_funcs),
{}
};
#endif
@@ -3912,9 +3881,9 @@ hwp_cpu_matched:
}
- mutex_lock(&intel_pstate_driver_lock);
- rc = intel_pstate_register_driver(default_driver);
- mutex_unlock(&intel_pstate_driver_lock);
+ scoped_guard(mutex, &intel_pstate_driver_lock) {
+ rc = intel_pstate_register_driver(default_driver);
+ }
if (rc) {
intel_pstate_sysfs_remove();
return rc;
diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c
index 765a5bb81829..81e16b5a0245 100644
--- a/drivers/cpufreq/qcom-cpufreq-nvmem.c
+++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c
@@ -256,13 +256,22 @@ len_error:
return ret;
}
+static const struct of_device_id qcom_cpufreq_ipq806x_match_list[] __maybe_unused = {
+ { .compatible = "qcom,ipq8062", .data = (const void *)QCOM_ID_IPQ8062 },
+ { .compatible = "qcom,ipq8064", .data = (const void *)QCOM_ID_IPQ8064 },
+ { .compatible = "qcom,ipq8065", .data = (const void *)QCOM_ID_IPQ8065 },
+ { .compatible = "qcom,ipq8066", .data = (const void *)QCOM_ID_IPQ8066 },
+ { .compatible = "qcom,ipq8068", .data = (const void *)QCOM_ID_IPQ8068 },
+ { .compatible = "qcom,ipq8069", .data = (const void *)QCOM_ID_IPQ8069 },
+};
+
static int qcom_cpufreq_ipq8064_name_version(struct device *cpu_dev,
struct nvmem_cell *speedbin_nvmem,
char **pvs_name,
struct qcom_cpufreq_drv *drv)
{
+ int msm_id = -1, ret = 0;
int speed = 0, pvs = 0;
- int msm_id, ret = 0;
u8 *speedbin;
size_t len;
@@ -279,8 +288,30 @@ static int qcom_cpufreq_ipq8064_name_version(struct device *cpu_dev,
get_krait_bin_format_a(cpu_dev, &speed, &pvs, speedbin);
ret = qcom_smem_get_soc_id(&msm_id);
- if (ret)
+ if (ret == -ENODEV) {
+ const struct of_device_id *match;
+ struct device_node *root;
+
+ root = of_find_node_by_path("/");
+ if (!root) {
+ ret = -ENODEV;
+ goto exit;
+ }
+
+ /* Fallback to compatible match with no SMEM initialized */
+ match = of_match_node(qcom_cpufreq_ipq806x_match_list, root);
+ of_node_put(root);
+ if (!match) {
+ ret = -ENODEV;
+ goto exit;
+ }
+
+ /* We found a matching device, get the msm_id from the data entry */
+ msm_id = (int)(uintptr_t)match->data;
+ ret = 0;
+ } else if (ret) {
goto exit;
+ }
switch (msm_id) {
case QCOM_ID_IPQ8062:
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index 4215621deb3f..ba8a1c96427a 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -518,7 +518,7 @@ static int s5pv210_cpu_init(struct cpufreq_policy *policy)
if (policy->cpu != 0) {
ret = -EINVAL;
- goto out_dmc1;
+ goto out;
}
/*
@@ -530,7 +530,7 @@ static int s5pv210_cpu_init(struct cpufreq_policy *policy)
if ((mem_type != LPDDR) && (mem_type != LPDDR2)) {
pr_err("CPUFreq doesn't support this memory type\n");
ret = -EINVAL;
- goto out_dmc1;
+ goto out;
}
/* Find current refresh counter and frequency each DMC */
@@ -544,6 +544,8 @@ static int s5pv210_cpu_init(struct cpufreq_policy *policy)
cpufreq_generic_init(policy, s5pv210_freq_table, 40000);
return 0;
+out:
+ clk_put(dmc1_clk);
out_dmc1:
clk_put(dmc0_clk);
out_dmc0:
diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c
index 136ab102f636..34ed943c5f34 100644
--- a/drivers/cpufreq/tegra186-cpufreq.c
+++ b/drivers/cpufreq/tegra186-cpufreq.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/units.h>
#include <soc/tegra/bpmp.h>
#include <soc/tegra/bpmp-abi.h>
@@ -58,7 +59,7 @@ static const struct tegra186_cpufreq_cpu tegra186_cpus[] = {
};
struct tegra186_cpufreq_cluster {
- struct cpufreq_frequency_table *table;
+ struct cpufreq_frequency_table *bpmp_lut;
u32 ref_clk_khz;
u32 div;
};
@@ -66,16 +67,119 @@ struct tegra186_cpufreq_cluster {
struct tegra186_cpufreq_data {
void __iomem *regs;
const struct tegra186_cpufreq_cpu *cpus;
+ bool icc_dram_bw_scaling;
struct tegra186_cpufreq_cluster clusters[];
};
+static int tegra_cpufreq_set_bw(struct cpufreq_policy *policy, unsigned long freq_khz)
+{
+ struct tegra186_cpufreq_data *data = cpufreq_get_driver_data();
+ struct device *dev;
+ int ret;
+
+ dev = get_cpu_device(policy->cpu);
+ if (!dev)
+ return -ENODEV;
+
+ struct dev_pm_opp *opp __free(put_opp) =
+ dev_pm_opp_find_freq_exact(dev, freq_khz * HZ_PER_KHZ, true);
+ if (IS_ERR(opp))
+ return PTR_ERR(opp);
+
+ ret = dev_pm_opp_set_opp(dev, opp);
+ if (ret)
+ data->icc_dram_bw_scaling = false;
+
+ return ret;
+}
+
+static int tegra_cpufreq_init_cpufreq_table(struct cpufreq_policy *policy,
+ struct cpufreq_frequency_table *bpmp_lut,
+ struct cpufreq_frequency_table **opp_table)
+{
+ struct tegra186_cpufreq_data *data = cpufreq_get_driver_data();
+ struct cpufreq_frequency_table *freq_table = NULL;
+ struct cpufreq_frequency_table *pos;
+ struct device *cpu_dev;
+ unsigned long rate;
+ int ret, max_opps;
+ int j = 0;
+
+ cpu_dev = get_cpu_device(policy->cpu);
+ if (!cpu_dev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__, policy->cpu);
+ return -ENODEV;
+ }
+
+ /* Initialize OPP table mentioned in operating-points-v2 property in DT */
+ ret = dev_pm_opp_of_add_table_indexed(cpu_dev, 0);
+ if (ret) {
+ dev_err(cpu_dev, "Invalid or empty opp table in device tree\n");
+ data->icc_dram_bw_scaling = false;
+ return ret;
+ }
+
+ max_opps = dev_pm_opp_get_opp_count(cpu_dev);
+ if (max_opps <= 0) {
+ dev_err(cpu_dev, "Failed to add OPPs\n");
+ return max_opps;
+ }
+
+ /* Disable all opps and cross-validate against LUT later */
+ for (rate = 0; ; rate++) {
+ struct dev_pm_opp *opp __free(put_opp) =
+ dev_pm_opp_find_freq_ceil(cpu_dev, &rate);
+ if (IS_ERR(opp))
+ break;
+
+ dev_pm_opp_disable(cpu_dev, rate);
+ }
+
+ freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_KERNEL);
+ if (!freq_table)
+ return -ENOMEM;
+
+ /*
+ * Cross check the frequencies from BPMP-FW LUT against the OPP's present in DT.
+ * Enable only those DT OPP's which are present in LUT also.
+ */
+ cpufreq_for_each_valid_entry(pos, bpmp_lut) {
+ struct dev_pm_opp *opp __free(put_opp) =
+ dev_pm_opp_find_freq_exact(cpu_dev, pos->frequency * HZ_PER_KHZ, false);
+ if (IS_ERR(opp))
+ continue;
+
+ ret = dev_pm_opp_enable(cpu_dev, pos->frequency * HZ_PER_KHZ);
+ if (ret < 0)
+ return ret;
+
+ freq_table[j].driver_data = pos->driver_data;
+ freq_table[j].frequency = pos->frequency;
+ j++;
+ }
+
+ freq_table[j].driver_data = pos->driver_data;
+ freq_table[j].frequency = CPUFREQ_TABLE_END;
+
+ *opp_table = &freq_table[0];
+
+ dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus);
+
+ /* Prime interconnect data */
+ tegra_cpufreq_set_bw(policy, freq_table[j - 1].frequency);
+
+ return ret;
+}
+
static int tegra186_cpufreq_init(struct cpufreq_policy *policy)
{
struct tegra186_cpufreq_data *data = cpufreq_get_driver_data();
unsigned int cluster = data->cpus[policy->cpu].bpmp_cluster_id;
+ struct cpufreq_frequency_table *freq_table;
+ struct cpufreq_frequency_table *bpmp_lut;
u32 cpu;
+ int ret;
- policy->freq_table = data->clusters[cluster].table;
policy->cpuinfo.transition_latency = 300 * 1000;
policy->driver_data = NULL;
@@ -85,6 +189,20 @@ static int tegra186_cpufreq_init(struct cpufreq_policy *policy)
cpumask_set_cpu(cpu, policy->cpus);
}
+ bpmp_lut = data->clusters[cluster].bpmp_lut;
+
+ if (data->icc_dram_bw_scaling) {
+ ret = tegra_cpufreq_init_cpufreq_table(policy, bpmp_lut, &freq_table);
+ if (!ret) {
+ policy->freq_table = freq_table;
+ return 0;
+ }
+ }
+
+ data->icc_dram_bw_scaling = false;
+ policy->freq_table = bpmp_lut;
+ pr_info("OPP tables missing from DT, EMC frequency scaling disabled\n");
+
return 0;
}
@@ -102,6 +220,10 @@ static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy,
writel(edvd_val, data->regs + edvd_offset);
}
+ if (data->icc_dram_bw_scaling)
+ tegra_cpufreq_set_bw(policy, tbl->frequency);
+
+
return 0;
}
@@ -134,7 +256,7 @@ static struct cpufreq_driver tegra186_cpufreq_driver = {
.init = tegra186_cpufreq_init,
};
-static struct cpufreq_frequency_table *init_vhint_table(
+static struct cpufreq_frequency_table *tegra_cpufreq_bpmp_read_lut(
struct platform_device *pdev, struct tegra_bpmp *bpmp,
struct tegra186_cpufreq_cluster *cluster, unsigned int cluster_id,
int *num_rates)
@@ -229,6 +351,7 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev)
{
struct tegra186_cpufreq_data *data;
struct tegra_bpmp *bpmp;
+ struct device *cpu_dev;
unsigned int i = 0, err, edvd_offset;
int num_rates = 0;
u32 edvd_val, cpu;
@@ -254,9 +377,9 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev)
for (i = 0; i < TEGRA186_NUM_CLUSTERS; i++) {
struct tegra186_cpufreq_cluster *cluster = &data->clusters[i];
- cluster->table = init_vhint_table(pdev, bpmp, cluster, i, &num_rates);
- if (IS_ERR(cluster->table)) {
- err = PTR_ERR(cluster->table);
+ cluster->bpmp_lut = tegra_cpufreq_bpmp_read_lut(pdev, bpmp, cluster, i, &num_rates);
+ if (IS_ERR(cluster->bpmp_lut)) {
+ err = PTR_ERR(cluster->bpmp_lut);
goto put_bpmp;
} else if (!num_rates) {
err = -EINVAL;
@@ -265,7 +388,7 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev)
for (cpu = 0; cpu < ARRAY_SIZE(tegra186_cpus); cpu++) {
if (data->cpus[cpu].bpmp_cluster_id == i) {
- edvd_val = cluster->table[num_rates - 1].driver_data;
+ edvd_val = cluster->bpmp_lut[num_rates - 1].driver_data;
edvd_offset = data->cpus[cpu].edvd_offset;
writel(edvd_val, data->regs + edvd_offset);
}
@@ -274,6 +397,19 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev)
tegra186_cpufreq_driver.driver_data = data;
+ /* Check for optional OPPv2 and interconnect paths on CPU0 to enable ICC scaling */
+ cpu_dev = get_cpu_device(0);
+ if (!cpu_dev) {
+ err = -EPROBE_DEFER;
+ goto put_bpmp;
+ }
+
+ if (dev_pm_opp_of_get_opp_desc_node(cpu_dev)) {
+ err = dev_pm_opp_of_find_icc_paths(cpu_dev, NULL);
+ if (!err)
+ data->icc_dram_bw_scaling = true;
+ }
+
err = cpufreq_register_driver(&tegra186_cpufreq_driver);
put_bpmp:
diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
index 9b4f516f313e..695599e1001f 100644
--- a/drivers/cpufreq/tegra194-cpufreq.c
+++ b/drivers/cpufreq/tegra194-cpufreq.c
@@ -750,7 +750,8 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev)
if (IS_ERR(bpmp))
return PTR_ERR(bpmp);
- read_counters_wq = alloc_workqueue("read_counters_wq", __WQ_LEGACY, 1);
+ read_counters_wq = alloc_workqueue("read_counters_wq",
+ __WQ_LEGACY | WQ_PERCPU, 1);
if (!read_counters_wq) {
dev_err(&pdev->dev, "fail to create_workqueue\n");
err = -EINVAL;
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 56132e843c99..c7876e9e024f 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -184,20 +184,22 @@ static noinstr void enter_s2idle_proper(struct cpuidle_driver *drv,
* cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle.
* @drv: cpuidle driver for the given CPU.
* @dev: cpuidle device for the given CPU.
+ * @latency_limit_ns: Idle state exit latency limit
*
* If there are states with the ->enter_s2idle callback, find the deepest of
* them and enter it with frozen tick.
*/
-int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ u64 latency_limit_ns)
{
int index;
/*
- * Find the deepest state with ->enter_s2idle present, which guarantees
- * that interrupts won't be enabled when it exits and allows the tick to
- * be frozen safely.
+ * Find the deepest state with ->enter_s2idle present that meets the
+ * specified latency limit, which guarantees that interrupts won't be
+ * enabled when it exits and allows the tick to be frozen safely.
*/
- index = find_deepest_state(drv, dev, U64_MAX, 0, true);
+ index = find_deepest_state(drv, dev, latency_limit_ns, 0, true);
if (index > 0) {
enter_s2idle_proper(drv, dev, index);
local_irq_enable();
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 9bbfa594c442..370664c47e65 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -8,6 +8,8 @@
* This code is licenced under the GPL.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/sched.h>
@@ -193,6 +195,14 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
s->exit_latency_ns = 0;
else
s->exit_latency = div_u64(s->exit_latency_ns, NSEC_PER_USEC);
+
+ /*
+ * Warn if the exit latency of a CPU idle state exceeds its
+ * target residency which is assumed to never happen in cpuidle
+ * in multiple places.
+ */
+ if (s->exit_latency_ns > s->target_residency_ns)
+ pr_warn("Idle state %d target residency too low\n", i);
}
}
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index 0d0f9751ff8f..5d0e7f78c6c5 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -111,6 +111,10 @@ s64 cpuidle_governor_latency_req(unsigned int cpu)
struct device *device = get_cpu_device(cpu);
int device_req = dev_pm_qos_raw_resume_latency(device);
int global_req = cpu_latency_qos_limit();
+ int global_wake_req = cpu_wakeup_latency_qos_limit();
+
+ if (global_req > global_wake_req)
+ global_req = global_wake_req;
if (device_req > global_req)
device_req = global_req;
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 23239b0c04f9..64d6f7a1c776 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -317,12 +317,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
}
/*
- * Use a physical idle state, not busy polling, unless a timer
- * is going to trigger soon enough or the exit latency of the
- * idle state in question is greater than the predicted idle
- * duration.
+ * Use a physical idle state instead of busy polling so long as
+ * its target residency is below the residency threshold, its
+ * exit latency is not greater than the predicted idle duration,
+ * and the next timer doesn't expire soon.
*/
if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
+ s->target_residency_ns < RESIDENCY_THRESHOLD_NS &&
s->target_residency_ns <= data->next_timer_ns &&
s->exit_latency_ns <= predicted_ns) {
predicted_ns = s->target_residency_ns;
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index bfa55c1eab5b..81ac5fd58a1c 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -76,7 +76,7 @@
* likely woken up by a non-timer wakeup source).
*
* 2. If the second sum computed in step 1 is greater than a half of the sum of
- * both metrics for the candidate state bin and all subsequent bins(if any),
+ * both metrics for the candidate state bin and all subsequent bins (if any),
* a shallower idle state is likely to be more suitable, so look for it.
*
* - Traverse the enabled idle states shallower than the candidate one in the
@@ -133,21 +133,33 @@ struct teo_bin {
* @sleep_length_ns: Time till the closest timer event (at the selection time).
* @state_bins: Idle state data bins for this CPU.
* @total: Grand total of the "intercepts" and "hits" metrics for all bins.
+ * @total_tick: Wakeups by the scheduler tick.
* @tick_intercepts: "Intercepts" before TICK_NSEC.
* @short_idles: Wakeups after short idle periods.
- * @artificial_wakeup: Set if the wakeup has been triggered by a safety net.
+ * @tick_wakeup: Set if the last wakeup was by the scheduler tick.
*/
struct teo_cpu {
s64 sleep_length_ns;
struct teo_bin state_bins[CPUIDLE_STATE_MAX];
unsigned int total;
+ unsigned int total_tick;
unsigned int tick_intercepts;
unsigned int short_idles;
- bool artificial_wakeup;
+ bool tick_wakeup;
};
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
+static void teo_decay(unsigned int *metric)
+{
+ unsigned int delta = *metric >> DECAY_SHIFT;
+
+ if (delta)
+ *metric -= delta;
+ else
+ *metric = 0;
+}
+
/**
* teo_update - Update CPU metrics after wakeup.
* @drv: cpuidle driver containing state data.
@@ -155,21 +167,22 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
*/
static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
- struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
+ struct teo_cpu *cpu_data = this_cpu_ptr(&teo_cpus);
int i, idx_timer = 0, idx_duration = 0;
- s64 target_residency_ns;
- u64 measured_ns;
+ s64 target_residency_ns, measured_ns;
+ unsigned int total = 0;
- cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
+ teo_decay(&cpu_data->short_idles);
- if (cpu_data->artificial_wakeup) {
+ if (dev->poll_time_limit) {
+ dev->poll_time_limit = false;
/*
- * If one of the safety nets has triggered, assume that this
+ * Polling state timeout has triggered, so assume that this
* might have been a long sleep.
*/
- measured_ns = U64_MAX;
+ measured_ns = S64_MAX;
} else {
- u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
+ s64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
measured_ns = dev->last_residency_ns;
/*
@@ -196,8 +209,10 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
for (i = 0; i < drv->state_count; i++) {
struct teo_bin *bin = &cpu_data->state_bins[i];
- bin->hits -= bin->hits >> DECAY_SHIFT;
- bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
+ teo_decay(&bin->hits);
+ total += bin->hits;
+ teo_decay(&bin->intercepts);
+ total += bin->intercepts;
target_residency_ns = drv->states[i].target_residency_ns;
@@ -208,7 +223,24 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
}
}
- cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT;
+ cpu_data->total = total + PULSE;
+
+ teo_decay(&cpu_data->tick_intercepts);
+
+ teo_decay(&cpu_data->total_tick);
+ if (cpu_data->tick_wakeup) {
+ cpu_data->total_tick += PULSE;
+ /*
+ * If tick wakeups dominate the wakeup pattern, count this one
+ * as a hit on the deepest available idle state to increase the
+ * likelihood of stopping the tick.
+ */
+ if (3 * cpu_data->total_tick > 2 * cpu_data->total) {
+ cpu_data->state_bins[drv->state_count-1].hits += PULSE;
+ return;
+ }
+ }
+
/*
* If the measured idle duration falls into the same bin as the sleep
* length, this is a "hit", so update the "hits" metric for that bin.
@@ -219,18 +251,9 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
cpu_data->state_bins[idx_timer].hits += PULSE;
} else {
cpu_data->state_bins[idx_duration].intercepts += PULSE;
- if (TICK_NSEC <= measured_ns)
+ if (measured_ns <= TICK_NSEC)
cpu_data->tick_intercepts += PULSE;
}
-
- cpu_data->total -= cpu_data->total >> DECAY_SHIFT;
- cpu_data->total += PULSE;
-}
-
-static bool teo_state_ok(int i, struct cpuidle_driver *drv)
-{
- return !tick_nohz_tick_stopped() ||
- drv->states[i].target_residency_ns >= TICK_NSEC;
}
/**
@@ -239,17 +262,15 @@ static bool teo_state_ok(int i, struct cpuidle_driver *drv)
* @dev: Target CPU.
* @state_idx: Index of the capping idle state.
* @duration_ns: Idle duration value to match.
- * @no_poll: Don't consider polling states.
*/
static int teo_find_shallower_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int state_idx,
- s64 duration_ns, bool no_poll)
+ s64 duration_ns)
{
int i;
for (i = state_idx - 1; i >= 0; i--) {
- if (dev->states_usage[i].disable ||
- (no_poll && drv->states[i].flags & CPUIDLE_FLAG_POLLING))
+ if (dev->states_usage[i].disable)
continue;
state_idx = i;
@@ -268,7 +289,7 @@ static int teo_find_shallower_state(struct cpuidle_driver *drv,
static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick)
{
- struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
+ struct teo_cpu *cpu_data = this_cpu_ptr(&teo_cpus);
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
ktime_t delta_tick = TICK_NSEC / 2;
unsigned int idx_intercept_sum = 0;
@@ -356,7 +377,18 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* better choice.
*/
if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
- int first_suitable_idx = idx;
+ int min_idx = idx0;
+
+ if (tick_nohz_tick_stopped()) {
+ /*
+ * Look for the shallowest idle state below the current
+ * candidate one whose target residency is at least
+ * equal to the tick period length.
+ */
+ while (min_idx < idx &&
+ drv->states[min_idx].target_residency_ns < TICK_NSEC)
+ min_idx++;
+ }
/*
* Look for the deepest idle state whose target residency had
@@ -366,49 +398,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* Take the possible duration limitation present if the tick
* has been stopped already into account.
*/
- intercept_sum = 0;
-
- for (i = idx - 1; i >= 0; i--) {
- struct teo_bin *bin = &cpu_data->state_bins[i];
-
- intercept_sum += bin->intercepts;
-
- if (2 * intercept_sum > idx_intercept_sum) {
- /*
- * Use the current state unless it is too
- * shallow or disabled, in which case take the
- * first enabled state that is deep enough.
- */
- if (teo_state_ok(i, drv) &&
- !dev->states_usage[i].disable) {
- idx = i;
- break;
- }
- idx = first_suitable_idx;
- break;
- }
+ for (i = idx - 1, intercept_sum = 0; i >= min_idx; i--) {
+ intercept_sum += cpu_data->state_bins[i].intercepts;
if (dev->states_usage[i].disable)
continue;
- if (teo_state_ok(i, drv)) {
- /*
- * The current state is deep enough, but still
- * there may be a better one.
- */
- first_suitable_idx = i;
- continue;
- }
-
- /*
- * The current state is too shallow, so if no suitable
- * states other than the initial candidate have been
- * found, give up (the remaining states to check are
- * shallower still), but otherwise the first suitable
- * state other than the initial candidate may turn out
- * to be preferable.
- */
- if (first_suitable_idx == idx)
+ idx = i;
+ if (2 * intercept_sum > idx_intercept_sum)
break;
}
}
@@ -458,11 +455,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* If the closest expected timer is before the target residency of the
* candidate state, a shallower one needs to be found.
*/
- if (drv->states[idx].target_residency_ns > duration_ns) {
- i = teo_find_shallower_state(drv, dev, idx, duration_ns, false);
- if (teo_state_ok(i, drv))
- idx = i;
- }
+ if (drv->states[idx].target_residency_ns > duration_ns)
+ idx = teo_find_shallower_state(drv, dev, idx, duration_ns);
/*
* If the selected state's target residency is below the tick length
@@ -490,7 +484,7 @@ end:
*/
if (idx > idx0 &&
drv->states[idx].target_residency_ns > delta_tick)
- idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
+ idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
out_tick:
*stop_tick = false;
@@ -504,20 +498,11 @@ out_tick:
*/
static void teo_reflect(struct cpuidle_device *dev, int state)
{
- struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
+ struct teo_cpu *cpu_data = this_cpu_ptr(&teo_cpus);
+
+ cpu_data->tick_wakeup = tick_nohz_idle_got_tick();
dev->last_state_idx = state;
- if (dev->poll_time_limit ||
- (tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) {
- /*
- * The wakeup was not "genuine", but triggered by one of the
- * safety nets.
- */
- dev->poll_time_limit = false;
- cpu_data->artificial_wakeup = true;
- } else {
- cpu_data->artificial_wakeup = false;
- }
}
/**
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index 9b6d90a72601..c7524e4c522a 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -4,9 +4,13 @@
*/
#include <linux/cpuidle.h>
+#include <linux/export.h>
+#include <linux/irqflags.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/sched/idle.h>
+#include <linux/sprintf.h>
+#include <linux/types.h>
#define POLL_IDLE_RELAX_COUNT 200
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 2e8d01d47f69..00979f2e0e27 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -20,6 +20,7 @@
#include <linux/stat.h>
#include <linux/pm_opp.h>
#include <linux/devfreq.h>
+#include <linux/devfreq-governor.h>
#include <linux/workqueue.h>
#include <linux/platform_device.h>
#include <linux/list.h>
@@ -28,7 +29,6 @@
#include <linux/of.h>
#include <linux/pm_qos.h>
#include <linux/units.h>
-#include "governor.h"
#define CREATE_TRACE_POINTS
#include <trace/events/devfreq.h>
diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
deleted file mode 100644
index 0adfebc0467a..000000000000
--- a/drivers/devfreq/governor.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * governor.h - internal header for devfreq governors.
- *
- * Copyright (C) 2011 Samsung Electronics
- * MyungJoo Ham <myungjoo.ham@samsung.com>
- *
- * This header is for devfreq governors in drivers/devfreq/
- */
-
-#ifndef _GOVERNOR_H
-#define _GOVERNOR_H
-
-#include <linux/devfreq.h>
-
-#define DEVFREQ_NAME_LEN 16
-
-#define to_devfreq(DEV) container_of((DEV), struct devfreq, dev)
-
-/* Devfreq events */
-#define DEVFREQ_GOV_START 0x1
-#define DEVFREQ_GOV_STOP 0x2
-#define DEVFREQ_GOV_UPDATE_INTERVAL 0x3
-#define DEVFREQ_GOV_SUSPEND 0x4
-#define DEVFREQ_GOV_RESUME 0x5
-
-#define DEVFREQ_MIN_FREQ 0
-#define DEVFREQ_MAX_FREQ ULONG_MAX
-
-/*
- * Definition of the governor feature flags
- * - DEVFREQ_GOV_FLAG_IMMUTABLE
- * : This governor is never changeable to other governors.
- * - DEVFREQ_GOV_FLAG_IRQ_DRIVEN
- * : The devfreq won't schedule the work for this governor.
- */
-#define DEVFREQ_GOV_FLAG_IMMUTABLE BIT(0)
-#define DEVFREQ_GOV_FLAG_IRQ_DRIVEN BIT(1)
-
-/*
- * Definition of governor attribute flags except for common sysfs attributes
- * - DEVFREQ_GOV_ATTR_POLLING_INTERVAL
- * : Indicate polling_interval sysfs attribute
- * - DEVFREQ_GOV_ATTR_TIMER
- * : Indicate timer sysfs attribute
- */
-#define DEVFREQ_GOV_ATTR_POLLING_INTERVAL BIT(0)
-#define DEVFREQ_GOV_ATTR_TIMER BIT(1)
-
-/**
- * struct devfreq_cpu_data - Hold the per-cpu data
- * @node: list node
- * @dev: reference to cpu device.
- * @first_cpu: the cpumask of the first cpu of a policy.
- * @opp_table: reference to cpu opp table.
- * @cur_freq: the current frequency of the cpu.
- * @min_freq: the min frequency of the cpu.
- * @max_freq: the max frequency of the cpu.
- *
- * This structure stores the required cpu_data of a cpu.
- * This is auto-populated by the governor.
- */
-struct devfreq_cpu_data {
- struct list_head node;
-
- struct device *dev;
- unsigned int first_cpu;
-
- struct opp_table *opp_table;
- unsigned int cur_freq;
- unsigned int min_freq;
- unsigned int max_freq;
-};
-
-/**
- * struct devfreq_governor - Devfreq policy governor
- * @node: list node - contains registered devfreq governors
- * @name: Governor's name
- * @attrs: Governor's sysfs attribute flags
- * @flags: Governor's feature flags
- * @get_target_freq: Returns desired operating frequency for the device.
- * Basically, get_target_freq will run
- * devfreq_dev_profile.get_dev_status() to get the
- * status of the device (load = busy_time / total_time).
- * @event_handler: Callback for devfreq core framework to notify events
- * to governors. Events include per device governor
- * init and exit, opp changes out of devfreq, suspend
- * and resume of per device devfreq during device idle.
- *
- * Note that the callbacks are called with devfreq->lock locked by devfreq.
- */
-struct devfreq_governor {
- struct list_head node;
-
- const char name[DEVFREQ_NAME_LEN];
- const u64 attrs;
- const u64 flags;
- int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
- int (*event_handler)(struct devfreq *devfreq,
- unsigned int event, void *data);
-};
-
-void devfreq_monitor_start(struct devfreq *devfreq);
-void devfreq_monitor_stop(struct devfreq *devfreq);
-void devfreq_monitor_suspend(struct devfreq *devfreq);
-void devfreq_monitor_resume(struct devfreq *devfreq);
-void devfreq_update_interval(struct devfreq *devfreq, unsigned int *delay);
-
-int devfreq_add_governor(struct devfreq_governor *governor);
-int devfreq_remove_governor(struct devfreq_governor *governor);
-
-int devm_devfreq_add_governor(struct device *dev,
- struct devfreq_governor *governor);
-
-int devfreq_update_status(struct devfreq *devfreq, unsigned long freq);
-int devfreq_update_target(struct devfreq *devfreq, unsigned long freq);
-void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq,
- unsigned long *max_freq);
-
-static inline int devfreq_update_stats(struct devfreq *df)
-{
- if (!df->profile->get_dev_status)
- return -EINVAL;
-
- return df->profile->get_dev_status(df->dev.parent, &df->last_status);
-}
-#endif /* _GOVERNOR_H */
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index 953cf9a1e9f7..8cd6f9a59f64 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -14,8 +14,33 @@
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/devfreq.h>
+#include <linux/devfreq-governor.h>
#include <linux/units.h>
-#include "governor.h"
+
+/**
+ * struct devfreq_cpu_data - Hold the per-cpu data
+ * @node: list node
+ * @dev: reference to cpu device.
+ * @first_cpu: the cpumask of the first cpu of a policy.
+ * @opp_table: reference to cpu opp table.
+ * @cur_freq: the current frequency of the cpu.
+ * @min_freq: the min frequency of the cpu.
+ * @max_freq: the max frequency of the cpu.
+ *
+ * This structure stores the required cpu_data of a cpu.
+ * This is auto-populated by the governor.
+ */
+struct devfreq_cpu_data {
+ struct list_head node;
+
+ struct device *dev;
+ unsigned int first_cpu;
+
+ struct opp_table *opp_table;
+ unsigned int cur_freq;
+ unsigned int min_freq;
+ unsigned int max_freq;
+};
static struct devfreq_cpu_data *
get_parent_cpu_data(struct devfreq_passive_data *p_data,
diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c
index 2e4e981446fa..fdb22bf512cf 100644
--- a/drivers/devfreq/governor_performance.c
+++ b/drivers/devfreq/governor_performance.c
@@ -7,8 +7,8 @@
*/
#include <linux/devfreq.h>
+#include <linux/devfreq-governor.h>
#include <linux/module.h>
-#include "governor.h"
static int devfreq_performance_func(struct devfreq *df,
unsigned long *freq)
diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c
index f059e8814804..ee2d6ec8a512 100644
--- a/drivers/devfreq/governor_powersave.c
+++ b/drivers/devfreq/governor_powersave.c
@@ -7,8 +7,8 @@
*/
#include <linux/devfreq.h>
+#include <linux/devfreq-governor.h>
#include <linux/module.h>
-#include "governor.h"
static int devfreq_powersave_func(struct devfreq *df,
unsigned long *freq)
diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c
index c23435736367..ac9c5e9e51a4 100644
--- a/drivers/devfreq/governor_simpleondemand.c
+++ b/drivers/devfreq/governor_simpleondemand.c
@@ -9,12 +9,12 @@
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/devfreq.h>
+#include <linux/devfreq-governor.h>
#include <linux/math64.h>
-#include "governor.h"
/* Default constants for DevFreq-Simple-Ondemand (DFSO) */
#define DFSO_UPTHRESHOLD (90)
-#define DFSO_DOWNDIFFERENCTIAL (5)
+#define DFSO_DOWNDIFFERENTIAL (5)
static int devfreq_simple_ondemand_func(struct devfreq *df,
unsigned long *freq)
{
@@ -22,7 +22,7 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
struct devfreq_dev_status *stat;
unsigned long long a, b;
unsigned int dfso_upthreshold = DFSO_UPTHRESHOLD;
- unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENCTIAL;
+ unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENTIAL;
struct devfreq_simple_ondemand_data *data = df->data;
err = devfreq_update_stats(df);
diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c
index 175de0c0b50e..395174f93960 100644
--- a/drivers/devfreq/governor_userspace.c
+++ b/drivers/devfreq/governor_userspace.c
@@ -9,11 +9,11 @@
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/devfreq.h>
+#include <linux/devfreq-governor.h>
#include <linux/kstrtox.h>
#include <linux/pm.h>
#include <linux/mutex.h>
#include <linux/module.h>
-#include "governor.h"
struct userspace_data {
unsigned long user_frequency;
diff --git a/drivers/devfreq/hisi_uncore_freq.c b/drivers/devfreq/hisi_uncore_freq.c
index 96d1815059e3..4d00d813c8ac 100644
--- a/drivers/devfreq/hisi_uncore_freq.c
+++ b/drivers/devfreq/hisi_uncore_freq.c
@@ -9,6 +9,7 @@
#include <linux/bits.h>
#include <linux/cleanup.h>
#include <linux/devfreq.h>
+#include <linux/devfreq-governor.h>
#include <linux/device.h>
#include <linux/dev_printk.h>
#include <linux/errno.h>
@@ -26,8 +27,6 @@
#include <linux/units.h>
#include <acpi/pcc.h>
-#include "governor.h"
-
struct hisi_uncore_pcc_data {
u16 status;
u16 resv;
@@ -265,10 +264,11 @@ static int hisi_uncore_target(struct device *dev, unsigned long *freq,
dev_err(dev, "Failed to get opp for freq %lu hz\n", *freq);
return PTR_ERR(opp);
}
- dev_pm_opp_put(opp);
data = (u32)(dev_pm_opp_get_freq(opp) / HZ_PER_MHZ);
+ dev_pm_opp_put(opp);
+
return hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_SET_FREQ, &data);
}
diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
index 4a4f0106ab9d..8b57194ac698 100644
--- a/drivers/devfreq/tegra30-devfreq.c
+++ b/drivers/devfreq/tegra30-devfreq.c
@@ -9,9 +9,11 @@
#include <linux/clk.h>
#include <linux/cpufreq.h>
#include <linux/devfreq.h>
+#include <linux/devfreq-governor.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/irq.h>
+#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
@@ -21,8 +23,6 @@
#include <soc/tegra/fuse.h>
-#include "governor.h"
-
#define ACTMON_GLB_STATUS 0x0
#define ACTMON_GLB_PERIOD_CTRL 0x4
@@ -326,14 +326,9 @@ static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra,
unsigned int i;
const struct tegra_actmon_emc_ratio *ratio = actmon_emc_ratios;
- for (i = 0; i < ARRAY_SIZE(actmon_emc_ratios); i++, ratio++) {
- if (cpu_freq >= ratio->cpu_freq) {
- if (ratio->emc_freq >= tegra->max_freq)
- return tegra->max_freq;
- else
- return ratio->emc_freq;
- }
- }
+ for (i = 0; i < ARRAY_SIZE(actmon_emc_ratios); i++, ratio++)
+ if (cpu_freq >= ratio->cpu_freq)
+ return min(ratio->emc_freq, tegra->max_freq);
return 0;
}
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index bba4f7daff8c..dbebb8c829bc 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -309,9 +309,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_is_turbo);
*/
unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
{
- struct opp_table *opp_table __free(put_opp_table);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
- opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table))
return 0;
@@ -327,7 +327,6 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
*/
unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
{
- struct opp_table *opp_table __free(put_opp_table);
struct dev_pm_opp *opp;
struct regulator *reg;
unsigned long latency_ns = 0;
@@ -337,7 +336,9 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
unsigned long max;
} *uV;
- opp_table = _find_opp_table(dev);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
+
if (IS_ERR(opp_table))
return 0;
@@ -409,10 +410,11 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_transition_latency);
*/
unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev)
{
- struct opp_table *opp_table __free(put_opp_table);
unsigned long freq = 0;
- opp_table = _find_opp_table(dev);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
+
if (IS_ERR(opp_table))
return 0;
@@ -447,9 +449,9 @@ int _get_opp_count(struct opp_table *opp_table)
*/
int dev_pm_opp_get_opp_count(struct device *dev)
{
- struct opp_table *opp_table __free(put_opp_table);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
- opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table)) {
dev_dbg(dev, "%s: OPP table not found (%ld)\n",
__func__, PTR_ERR(opp_table));
@@ -605,9 +607,9 @@ _find_key(struct device *dev, unsigned long *key, int index, bool available,
unsigned long opp_key, unsigned long key),
bool (*assert)(struct opp_table *opp_table, unsigned int index))
{
- struct opp_table *opp_table __free(put_opp_table);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
- opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table)) {
dev_err(dev, "%s: OPP table not found (%ld)\n", __func__,
PTR_ERR(opp_table));
@@ -1410,12 +1412,13 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
*/
int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
{
- struct opp_table *opp_table __free(put_opp_table);
struct dev_pm_opp *opp __free(put_opp) = NULL;
unsigned long freq = 0, temp_freq;
bool forced = false;
- opp_table = _find_opp_table(dev);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
+
if (IS_ERR(opp_table)) {
dev_err(dev, "%s: device's opp table doesn't exist\n", __func__);
return PTR_ERR(opp_table);
@@ -1477,9 +1480,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate);
*/
int dev_pm_opp_set_opp(struct device *dev, struct dev_pm_opp *opp)
{
- struct opp_table *opp_table __free(put_opp_table);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
- opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table)) {
dev_err(dev, "%s: device opp doesn't exist\n", __func__);
return PTR_ERR(opp_table);
@@ -1794,10 +1797,11 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put);
*/
void dev_pm_opp_remove(struct device *dev, unsigned long freq)
{
- struct opp_table *opp_table __free(put_opp_table);
struct dev_pm_opp *opp = NULL, *iter;
- opp_table = _find_opp_table(dev);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
+
if (IS_ERR(opp_table))
return;
@@ -1885,9 +1889,9 @@ bool _opp_remove_all_static(struct opp_table *opp_table)
*/
void dev_pm_opp_remove_all_dynamic(struct device *dev)
{
- struct opp_table *opp_table __free(put_opp_table);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
- opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table))
return;
@@ -2871,10 +2875,11 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
bool availability_req)
{
struct dev_pm_opp *opp __free(put_opp) = ERR_PTR(-ENODEV), *tmp_opp;
- struct opp_table *opp_table __free(put_opp_table);
/* Find the opp_table */
- opp_table = _find_opp_table(dev);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
+
if (IS_ERR(opp_table)) {
dev_warn(dev, "%s: Device OPP not found (%ld)\n", __func__,
PTR_ERR(opp_table));
@@ -2932,11 +2937,12 @@ int dev_pm_opp_adjust_voltage(struct device *dev, unsigned long freq,
{
struct dev_pm_opp *opp __free(put_opp) = ERR_PTR(-ENODEV), *tmp_opp;
- struct opp_table *opp_table __free(put_opp_table);
int r;
/* Find the opp_table */
- opp_table = _find_opp_table(dev);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
+
if (IS_ERR(opp_table)) {
r = PTR_ERR(opp_table);
dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
@@ -2986,12 +2992,13 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_adjust_voltage);
*/
int dev_pm_opp_sync_regulators(struct device *dev)
{
- struct opp_table *opp_table __free(put_opp_table);
struct regulator *reg;
int ret, i;
/* Device may not have OPP table */
- opp_table = _find_opp_table(dev);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
+
if (IS_ERR(opp_table))
return 0;
@@ -3062,9 +3069,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_disable);
*/
int dev_pm_opp_register_notifier(struct device *dev, struct notifier_block *nb)
{
- struct opp_table *opp_table __free(put_opp_table);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
- opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table))
return PTR_ERR(opp_table);
@@ -3082,9 +3089,9 @@ EXPORT_SYMBOL(dev_pm_opp_register_notifier);
int dev_pm_opp_unregister_notifier(struct device *dev,
struct notifier_block *nb)
{
- struct opp_table *opp_table __free(put_opp_table);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
- opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table))
return PTR_ERR(opp_table);
@@ -3101,10 +3108,10 @@ EXPORT_SYMBOL(dev_pm_opp_unregister_notifier);
*/
void dev_pm_opp_remove_table(struct device *dev)
{
- struct opp_table *opp_table __free(put_opp_table);
-
/* Check for existing table for 'dev' */
- opp_table = _find_opp_table(dev);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(dev);
+
if (IS_ERR(opp_table)) {
int error = PTR_ERR(opp_table);
diff --git a/drivers/opp/cpu.c b/drivers/opp/cpu.c
index 97989d4fe336..a6da7ee3ec76 100644
--- a/drivers/opp/cpu.c
+++ b/drivers/opp/cpu.c
@@ -56,10 +56,10 @@ int dev_pm_opp_init_cpufreq_table(struct device *dev,
return -ENOMEM;
for (i = 0, rate = 0; i < max_opps; i++, rate++) {
- struct dev_pm_opp *opp __free(put_opp);
-
/* find next rate */
- opp = dev_pm_opp_find_freq_ceil(dev, &rate);
+ struct dev_pm_opp *opp __free(put_opp) =
+ dev_pm_opp_find_freq_ceil(dev, &rate);
+
if (IS_ERR(opp)) {
ret = PTR_ERR(opp);
goto out;
@@ -154,12 +154,13 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_cpumask_remove_table);
int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev,
const struct cpumask *cpumask)
{
- struct opp_table *opp_table __free(put_opp_table);
struct opp_device *opp_dev;
struct device *dev;
int cpu;
- opp_table = _find_opp_table(cpu_dev);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(cpu_dev);
+
if (IS_ERR(opp_table))
return PTR_ERR(opp_table);
@@ -201,10 +202,11 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
*/
int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
{
- struct opp_table *opp_table __free(put_opp_table);
struct opp_device *opp_dev;
- opp_table = _find_opp_table(cpu_dev);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_opp_table(cpu_dev);
+
if (IS_ERR(opp_table))
return PTR_ERR(opp_table);
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 505d79821584..1e0d0adb18e1 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -45,9 +45,10 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node);
struct opp_table *_managed_opp(struct device *dev, int index)
{
struct opp_table *opp_table, *managed_table = NULL;
- struct device_node *np __free(device_node);
- np = _opp_of_get_opp_desc_node(dev->of_node, index);
+ struct device_node *np __free(device_node) =
+ _opp_of_get_opp_desc_node(dev->of_node, index);
+
if (!np)
return NULL;
@@ -95,10 +96,11 @@ static struct device_node *of_parse_required_opp(struct device_node *np,
/* The caller must call dev_pm_opp_put_opp_table() after the table is used */
static struct opp_table *_find_table_of_opp_np(struct device_node *opp_np)
{
- struct device_node *opp_table_np __free(device_node);
struct opp_table *opp_table;
- opp_table_np = of_get_parent(opp_np);
+ struct device_node *opp_table_np __free(device_node) =
+ of_get_parent(opp_np);
+
if (!opp_table_np)
return ERR_PTR(-ENODEV);
@@ -146,12 +148,13 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table,
struct device_node *opp_np)
{
struct opp_table **required_opp_tables;
- struct device_node *np __free(device_node);
bool lazy = false;
int count, i, size;
/* Traversing the first OPP node is all we need */
- np = of_get_next_available_child(opp_np, NULL);
+ struct device_node *np __free(device_node) =
+ of_get_next_available_child(opp_np, NULL);
+
if (!np) {
dev_warn(dev, "Empty OPP table\n");
return;
@@ -171,9 +174,9 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table,
opp_table->required_opp_count = count;
for (i = 0; i < count; i++) {
- struct device_node *required_np __free(device_node);
+ struct device_node *required_np __free(device_node) =
+ of_parse_required_opp(np, i);
- required_np = of_parse_required_opp(np, i);
if (!required_np) {
_opp_table_free_required_tables(opp_table);
return;
@@ -199,14 +202,15 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table,
void _of_init_opp_table(struct opp_table *opp_table, struct device *dev,
int index)
{
- struct device_node *np __free(device_node), *opp_np;
+ struct device_node *opp_np;
u32 val;
/*
* Only required for backward compatibility with v1 bindings, but isn't
* harmful for other cases. And so we do it unconditionally.
*/
- np = of_node_get(dev->of_node);
+ struct device_node *np __free(device_node) = of_node_get(dev->of_node);
+
if (!np)
return;
@@ -273,9 +277,9 @@ void _of_clear_opp(struct opp_table *opp_table, struct dev_pm_opp *opp)
static int _link_required_opps(struct dev_pm_opp *opp,
struct opp_table *required_table, int index)
{
- struct device_node *np __free(device_node);
+ struct device_node *np __free(device_node) =
+ of_parse_required_opp(opp->np, index);
- np = of_parse_required_opp(opp->np, index);
if (unlikely(!np))
return -ENODEV;
@@ -349,16 +353,13 @@ static void lazy_link_required_opp_table(struct opp_table *new_table)
guard(mutex)(&opp_table_lock);
list_for_each_entry_safe(opp_table, temp, &lazy_opp_tables, lazy) {
- struct device_node *opp_np __free(device_node);
bool lazy = false;
/* opp_np can't be invalid here */
- opp_np = of_get_next_available_child(opp_table->np, NULL);
+ struct device_node *opp_np __free(device_node) =
+ of_get_next_available_child(opp_table->np, NULL);
for (i = 0; i < opp_table->required_opp_count; i++) {
- struct device_node *required_np __free(device_node) = NULL;
- struct device_node *required_table_np __free(device_node) = NULL;
-
required_opp_tables = opp_table->required_opp_tables;
/* Required opp-table is already parsed */
@@ -366,8 +367,10 @@ static void lazy_link_required_opp_table(struct opp_table *new_table)
continue;
/* required_np can't be invalid here */
- required_np = of_parse_required_opp(opp_np, i);
- required_table_np = of_get_parent(required_np);
+ struct device_node *required_np __free(device_node) =
+ of_parse_required_opp(opp_np, i);
+ struct device_node *required_table_np __free(device_node) =
+ of_get_parent(required_np);
/*
* Newly added table isn't the required opp-table for
@@ -402,13 +405,12 @@ static void lazy_link_required_opp_table(struct opp_table *new_table)
static int _bandwidth_supported(struct device *dev, struct opp_table *opp_table)
{
struct device_node *opp_np __free(device_node) = NULL;
- struct device_node *np __free(device_node) = NULL;
struct property *prop;
if (!opp_table) {
- struct device_node *np __free(device_node);
+ struct device_node *np __free(device_node) =
+ of_node_get(dev->of_node);
- np = of_node_get(dev->of_node);
if (!np)
return -ENODEV;
@@ -422,7 +424,9 @@ static int _bandwidth_supported(struct device *dev, struct opp_table *opp_table)
return 0;
/* Checking only first OPP is sufficient */
- np = of_get_next_available_child(opp_np, NULL);
+ struct device_node *np __free(device_node) =
+ of_get_next_available_child(opp_np, NULL);
+
if (!np) {
dev_err(dev, "OPP table empty\n");
return -EINVAL;
@@ -1269,11 +1273,12 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
struct cpumask *cpumask)
{
- struct device_node *np __free(device_node);
int cpu;
/* Get OPP descriptor node */
- np = dev_pm_opp_of_get_opp_desc_node(cpu_dev);
+ struct device_node *np __free(device_node) =
+ dev_pm_opp_of_get_opp_desc_node(cpu_dev);
+
if (!np) {
dev_dbg(cpu_dev, "%s: Couldn't find opp node.\n", __func__);
return -ENOENT;
@@ -1286,13 +1291,12 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
return 0;
for_each_possible_cpu(cpu) {
- struct device_node *cpu_np __free(device_node) = NULL;
- struct device_node *tmp_np __free(device_node) = NULL;
-
if (cpu == cpu_dev->id)
continue;
- cpu_np = of_cpu_device_node_get(cpu);
+ struct device_node *cpu_np __free(device_node) =
+ of_cpu_device_node_get(cpu);
+
if (!cpu_np) {
dev_err(cpu_dev, "%s: failed to get cpu%d node\n",
__func__, cpu);
@@ -1300,7 +1304,9 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
}
/* Get OPP descriptor node */
- tmp_np = _opp_of_get_opp_desc_node(cpu_np, 0);
+ struct device_node *tmp_np __free(device_node) =
+ _opp_of_get_opp_desc_node(cpu_np, 0);
+
if (!tmp_np) {
pr_err("%pOF: Couldn't find opp node\n", cpu_np);
return -ENOENT;
@@ -1328,16 +1334,17 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
*/
int of_get_required_opp_performance_state(struct device_node *np, int index)
{
- struct device_node *required_np __free(device_node);
- struct opp_table *opp_table __free(put_opp_table) = NULL;
- struct dev_pm_opp *opp __free(put_opp) = NULL;
int pstate = -EINVAL;
- required_np = of_parse_required_opp(np, index);
+ struct device_node *required_np __free(device_node) =
+ of_parse_required_opp(np, index);
+
if (!required_np)
return -ENODEV;
- opp_table = _find_table_of_opp_np(required_np);
+ struct opp_table *opp_table __free(put_opp_table) =
+ _find_table_of_opp_np(required_np);
+
if (IS_ERR(opp_table)) {
pr_err("%s: Failed to find required OPP table %pOF: %ld\n",
__func__, np, PTR_ERR(opp_table));
@@ -1350,7 +1357,9 @@ int of_get_required_opp_performance_state(struct device_node *np, int index)
return -EINVAL;
}
- opp = _find_opp_of_np(opp_table, required_np);
+ struct dev_pm_opp *opp __free(put_opp) =
+ _find_opp_of_np(opp_table, required_np);
+
if (opp) {
if (opp->level == OPP_LEVEL_UNSET) {
pr_err("%s: OPP levels aren't available for %pOF\n",
@@ -1376,14 +1385,17 @@ EXPORT_SYMBOL_GPL(of_get_required_opp_performance_state);
*/
bool dev_pm_opp_of_has_required_opp(struct device *dev)
{
- struct device_node *np __free(device_node) = NULL, *opp_np __free(device_node);
int count;
- opp_np = _opp_of_get_opp_desc_node(dev->of_node, 0);
+ struct device_node *opp_np __free(device_node) =
+ _opp_of_get_opp_desc_node(dev->of_node, 0);
+
if (!opp_np)
return false;
- np = of_get_next_available_child(opp_np, NULL);
+ struct device_node *np __free(device_node) =
+ of_get_next_available_child(opp_np, NULL);
+
if (!np) {
dev_warn(dev, "Empty OPP table\n");
return false;
@@ -1425,12 +1437,14 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
static int __maybe_unused
_get_dt_power(struct device *dev, unsigned long *uW, unsigned long *kHz)
{
- struct dev_pm_opp *opp __free(put_opp);
unsigned long opp_freq, opp_power;
/* Find the right frequency and related OPP */
opp_freq = *kHz * 1000;
- opp = dev_pm_opp_find_freq_ceil(dev, &opp_freq);
+
+ struct dev_pm_opp *opp __free(put_opp) =
+ dev_pm_opp_find_freq_ceil(dev, &opp_freq);
+
if (IS_ERR(opp))
return -EINVAL;
@@ -1465,14 +1479,13 @@ _get_dt_power(struct device *dev, unsigned long *uW, unsigned long *kHz)
int dev_pm_opp_calc_power(struct device *dev, unsigned long *uW,
unsigned long *kHz)
{
- struct dev_pm_opp *opp __free(put_opp) = NULL;
- struct device_node *np __free(device_node);
unsigned long mV, Hz;
u32 cap;
u64 tmp;
int ret;
- np = of_node_get(dev->of_node);
+ struct device_node *np __free(device_node) = of_node_get(dev->of_node);
+
if (!np)
return -EINVAL;
@@ -1481,7 +1494,10 @@ int dev_pm_opp_calc_power(struct device *dev, unsigned long *uW,
return -EINVAL;
Hz = *kHz * 1000;
- opp = dev_pm_opp_find_freq_ceil(dev, &Hz);
+
+ struct dev_pm_opp *opp __free(put_opp) =
+ dev_pm_opp_find_freq_ceil(dev, &Hz);
+
if (IS_ERR(opp))
return -EINVAL;
@@ -1502,11 +1518,12 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_calc_power);
static bool _of_has_opp_microwatt_property(struct device *dev)
{
- struct dev_pm_opp *opp __free(put_opp);
unsigned long freq = 0;
/* Check if at least one OPP has needed property */
- opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+ struct dev_pm_opp *opp __free(put_opp) =
+ dev_pm_opp_find_freq_ceil(dev, &freq);
+
if (IS_ERR(opp))
return false;
@@ -1526,12 +1543,16 @@ static bool _of_has_opp_microwatt_property(struct device *dev)
*/
int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus)
{
- struct device_node *np __free(device_node) = NULL;
struct em_data_callback em_cb;
int ret, nr_opp;
u32 cap;
- if (IS_ERR_OR_NULL(dev)) {
+ if (IS_ERR_OR_NULL(dev))
+ return -EINVAL;
+
+ struct device_node *np __free(device_node) = of_node_get(dev->of_node);
+
+ if (!np) {
ret = -EINVAL;
goto failed;
}
@@ -1548,12 +1569,6 @@ int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus)
goto register_em;
}
- np = of_node_get(dev->of_node);
- if (!np) {
- ret = -EINVAL;
- goto failed;
- }
-
/*
* Register an EM only if the 'dynamic-power-coefficient' property is
* set in devicetree. It is assumed the voltage values are known if that
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 9d6f74bd95f8..3881359440b1 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1517,8 +1517,8 @@ static ssize_t reset_method_store(struct device *dev,
return count;
}
- ACQUIRE(pm_runtime_active_try, pm)(dev);
- if (ACQUIRE_ERR(pm_runtime_active_try, &pm))
+ PM_RUNTIME_ACQUIRE(dev, pm);
+ if (PM_RUNTIME_ACQUIRE_ERR(&pm))
return -ENXIO;
if (sysfs_streq(buf, "default")) {
diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c
index 61c2277c9ce3..4fd546ef0448 100644
--- a/drivers/pmdomain/core.c
+++ b/drivers/pmdomain/core.c
@@ -1425,8 +1425,14 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
return;
}
- /* Choose the deepest state when suspending */
- genpd->state_idx = genpd->state_count - 1;
+ if (genpd->gov && genpd->gov->system_power_down_ok) {
+ if (!genpd->gov->system_power_down_ok(&genpd->domain))
+ return;
+ } else {
+ /* Default to the deepest state. */
+ genpd->state_idx = genpd->state_count - 1;
+ }
+
if (_genpd_power_off(genpd, false)) {
genpd->states[genpd->state_idx].rejected++;
return;
diff --git a/drivers/pmdomain/governor.c b/drivers/pmdomain/governor.c
index 39359811a930..05e68680f34b 100644
--- a/drivers/pmdomain/governor.c
+++ b/drivers/pmdomain/governor.c
@@ -351,7 +351,7 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
ktime_t domain_wakeup, next_hrtimer;
ktime_t now = ktime_get();
struct device *cpu_dev;
- s64 cpu_constraint, global_constraint;
+ s64 cpu_constraint, global_constraint, wakeup_constraint;
s64 idle_duration_ns;
int cpu, i;
@@ -362,7 +362,11 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN))
return true;
+ wakeup_constraint = cpu_wakeup_latency_qos_limit();
global_constraint = cpu_latency_qos_limit();
+ if (global_constraint > wakeup_constraint)
+ global_constraint = wakeup_constraint;
+
/*
* Find the next wakeup for any of the online CPUs within the PM domain
* and its subdomains. Note, we only need the genpd->cpus, as it already
@@ -415,9 +419,36 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
return false;
}
+static bool cpu_system_power_down_ok(struct dev_pm_domain *pd)
+{
+ s64 constraint_ns = cpu_wakeup_latency_qos_limit() * NSEC_PER_USEC;
+ struct generic_pm_domain *genpd = pd_to_genpd(pd);
+ int state_idx = genpd->state_count - 1;
+
+ if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN)) {
+ genpd->state_idx = state_idx;
+ return true;
+ }
+
+ /* Find the deepest state for the latency constraint. */
+ while (state_idx >= 0) {
+ s64 latency_ns = genpd->states[state_idx].power_off_latency_ns +
+ genpd->states[state_idx].power_on_latency_ns;
+
+ if (latency_ns <= constraint_ns) {
+ genpd->state_idx = state_idx;
+ return true;
+ }
+ state_idx--;
+ }
+
+ return false;
+}
+
struct dev_power_governor pm_domain_cpu_gov = {
.suspend_ok = default_suspend_ok,
.power_down_ok = cpu_power_down_ok,
+ .system_power_down_ok = cpu_system_power_down_ok,
};
#endif
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index c7e7f9bf5313..b9d87e56cbbc 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -253,7 +253,8 @@ struct rapl_primitive_info {
static void rapl_init_domains(struct rapl_package *rp);
static int rapl_read_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
- bool xlate, u64 *data);
+ bool xlate, u64 *data,
+ bool atomic);
static int rapl_write_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
unsigned long long value);
@@ -289,7 +290,7 @@ static int get_energy_counter(struct powercap_zone *power_zone,
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
- if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
+ if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now, false)) {
*energy_raw = energy_now;
cpus_read_unlock();
@@ -830,7 +831,8 @@ prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
* 63-------------------------- 31--------------------------- 0
*/
static int rapl_read_data_raw(struct rapl_domain *rd,
- enum rapl_primitives prim, bool xlate, u64 *data)
+ enum rapl_primitives prim, bool xlate, u64 *data,
+ bool atomic)
{
u64 value;
enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
@@ -852,7 +854,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
ra.mask = rpi->mask;
- if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+ if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, atomic)) {
pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name);
return -EIO;
}
@@ -904,7 +906,7 @@ static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
if (!is_pl_valid(rd, pl))
return -EINVAL;
- return rapl_read_data_raw(rd, prim, xlate, data);
+ return rapl_read_data_raw(rd, prim, xlate, data, false);
}
static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
@@ -941,7 +943,7 @@ static int rapl_check_unit_core(struct rapl_domain *rd)
ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
ra.mask = ~0;
- if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+ if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, false)) {
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
ra.reg.val, rd->rp->name, rd->name);
return -ENODEV;
@@ -969,7 +971,7 @@ static int rapl_check_unit_atom(struct rapl_domain *rd)
ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
ra.mask = ~0;
- if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+ if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, false)) {
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
ra.reg.val, rd->rp->name, rd->name);
return -ENODEV;
@@ -1156,7 +1158,7 @@ static int rapl_check_unit_tpmi(struct rapl_domain *rd)
ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
ra.mask = ~0;
- if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+ if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, false)) {
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
ra.reg.val, rd->rp->name, rd->name);
return -ENODEV;
@@ -1284,6 +1286,9 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &rapl_defaults_spr_server),
X86_MATCH_VFM(INTEL_LUNARLAKE_M, &rapl_defaults_core),
X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_WILDCATLAKE_L, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_NOVALAKE, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_NOVALAKE_L, &rapl_defaults_core),
X86_MATCH_VFM(INTEL_ARROWLAKE_H, &rapl_defaults_core),
X86_MATCH_VFM(INTEL_ARROWLAKE, &rapl_defaults_core),
X86_MATCH_VFM(INTEL_ARROWLAKE_U, &rapl_defaults_core),
@@ -1325,7 +1330,7 @@ static void rapl_update_domain_data(struct rapl_package *rp)
struct rapl_primitive_info *rpi = get_rpi(rp, prim);
if (!rapl_read_data_raw(&rp->domains[dmn], prim,
- rpi->unit, &val))
+ rpi->unit, &val, false))
rp->domains[dmn].rdd.primitives[prim] = val;
}
}
@@ -1425,7 +1430,7 @@ static int rapl_check_domain(int domain, struct rapl_package *rp)
*/
ra.mask = ENERGY_STATUS_MASK;
- if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value)
+ if (rp->priv->read_raw(get_rid(rp), &ra, false) || !ra.value)
return -ENODEV;
return 0;
@@ -1592,11 +1597,11 @@ static int get_pmu_cpu(struct rapl_package *rp)
if (!rp->has_pmu)
return nr_cpu_ids;
- /* Only TPMI RAPL is supported for now */
- if (rp->priv->type != RAPL_IF_TPMI)
+ /* Only TPMI & MSR RAPL are supported for now */
+ if (rp->priv->type != RAPL_IF_TPMI && rp->priv->type != RAPL_IF_MSR)
return nr_cpu_ids;
- /* TPMI RAPL uses any CPU in the package for PMU */
+ /* TPMI/MSR RAPL uses any CPU in the package for PMU */
for_each_online_cpu(cpu)
if (topology_physical_package_id(cpu) == rp->id)
return cpu;
@@ -1609,11 +1614,11 @@ static bool is_rp_pmu_cpu(struct rapl_package *rp, int cpu)
if (!rp->has_pmu)
return false;
- /* Only TPMI RAPL is supported for now */
- if (rp->priv->type != RAPL_IF_TPMI)
+ /* Only TPMI & MSR RAPL are supported for now */
+ if (rp->priv->type != RAPL_IF_TPMI && rp->priv->type != RAPL_IF_MSR)
return false;
- /* TPMI RAPL uses any CPU in the package for PMU */
+ /* TPMI/MSR RAPL uses any CPU in the package for PMU */
return topology_physical_package_id(cpu) == rp->id;
}
@@ -1636,7 +1641,7 @@ static u64 event_read_counter(struct perf_event *event)
if (event->hw.idx < 0)
return 0;
- ret = rapl_read_data_raw(&rp->domains[event->hw.idx], ENERGY_COUNTER, false, &val);
+ ret = rapl_read_data_raw(&rp->domains[event->hw.idx], ENERGY_COUNTER, false, &val, true);
/* Return 0 for failed read */
if (ret)
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 4ed06c71a3ac..0ce1096b6314 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -33,6 +33,8 @@
/* private data for RAPL MSR Interface */
static struct rapl_if_priv *rapl_msr_priv;
+static bool rapl_msr_pmu __ro_after_init;
+
static struct rapl_if_priv rapl_msr_priv_intel = {
.type = RAPL_IF_MSR,
.reg_unit.msr = MSR_RAPL_POWER_UNIT,
@@ -79,6 +81,8 @@ static int rapl_cpu_online(unsigned int cpu)
rp = rapl_add_package_cpuslocked(cpu, rapl_msr_priv, true);
if (IS_ERR(rp))
return PTR_ERR(rp);
+ if (rapl_msr_pmu)
+ rapl_package_add_pmu(rp);
}
cpumask_set_cpu(cpu, &rp->cpumask);
return 0;
@@ -95,19 +99,37 @@ static int rapl_cpu_down_prep(unsigned int cpu)
cpumask_clear_cpu(cpu, &rp->cpumask);
lead_cpu = cpumask_first(&rp->cpumask);
- if (lead_cpu >= nr_cpu_ids)
+ if (lead_cpu >= nr_cpu_ids) {
+ if (rapl_msr_pmu)
+ rapl_package_remove_pmu(rp);
rapl_remove_package_cpuslocked(rp);
- else if (rp->lead_cpu == cpu)
+ } else if (rp->lead_cpu == cpu) {
rp->lead_cpu = lead_cpu;
+ }
+
return 0;
}
-static int rapl_msr_read_raw(int cpu, struct reg_action *ra)
+static int rapl_msr_read_raw(int cpu, struct reg_action *ra, bool atomic)
{
+ /*
+ * When called from atomic-context (eg PMU event handler)
+ * perform MSR read directly using rdmsrq().
+ */
+ if (atomic) {
+ if (unlikely(smp_processor_id() != cpu))
+ return -EIO;
+
+ rdmsrq(ra->reg.msr, ra->value);
+ goto out;
+ }
+
if (rdmsrq_safe_on_cpu(cpu, ra->reg.msr, &ra->value)) {
pr_debug("failed to read msr 0x%x on cpu %d\n", ra->reg.msr, cpu);
return -EIO;
}
+
+out:
ra->value &= ra->mask;
return 0;
}
@@ -151,6 +173,16 @@ static const struct x86_cpu_id pl4_support_ids[] = {
X86_MATCH_VFM(INTEL_ARROWLAKE_U, NULL),
X86_MATCH_VFM(INTEL_ARROWLAKE_H, NULL),
X86_MATCH_VFM(INTEL_PANTHERLAKE_L, NULL),
+ X86_MATCH_VFM(INTEL_WILDCATLAKE_L, NULL),
+ X86_MATCH_VFM(INTEL_NOVALAKE, NULL),
+ X86_MATCH_VFM(INTEL_NOVALAKE_L, NULL),
+ {}
+};
+
+/* List of MSR-based RAPL PMU support CPUs */
+static const struct x86_cpu_id pmu_support_ids[] = {
+ X86_MATCH_VFM(INTEL_PANTHERLAKE_L, NULL),
+ X86_MATCH_VFM(INTEL_WILDCATLAKE_L, NULL),
{}
};
@@ -181,6 +213,11 @@ static int rapl_msr_probe(struct platform_device *pdev)
pr_info("PL4 support detected.\n");
}
+ if (x86_match_cpu(pmu_support_ids)) {
+ rapl_msr_pmu = true;
+ pr_info("MSR-based RAPL PMU support enabled\n");
+ }
+
rapl_msr_priv->control_type = powercap_register_control_type(NULL, "intel-rapl", NULL);
if (IS_ERR(rapl_msr_priv->control_type)) {
pr_debug("failed to register powercap control_type.\n");
diff --git a/drivers/powercap/intel_rapl_tpmi.c b/drivers/powercap/intel_rapl_tpmi.c
index 82201bf4685d..0a0b85f4528b 100644
--- a/drivers/powercap/intel_rapl_tpmi.c
+++ b/drivers/powercap/intel_rapl_tpmi.c
@@ -60,7 +60,7 @@ static DEFINE_MUTEX(tpmi_rapl_lock);
static struct powercap_control_type *tpmi_control_type;
-static int tpmi_rapl_read_raw(int id, struct reg_action *ra)
+static int tpmi_rapl_read_raw(int id, struct reg_action *ra, bool atomic)
{
if (!ra->reg.mmio)
return -EINVAL;
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index 1c15cac41d80..768b85eecc8f 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -1762,6 +1762,7 @@ static int mesh_suspend(struct macio_dev *mdev, pm_message_t mesg)
case PM_EVENT_SUSPEND:
case PM_EVENT_HIBERNATE:
case PM_EVENT_FREEZE:
+ case PM_EVENT_POWEROFF:
break;
default:
return 0;
diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
index d8ad02c29320..e6357bc301cb 100644
--- a/drivers/scsi/stex.c
+++ b/drivers/scsi/stex.c
@@ -1965,6 +1965,7 @@ static int stex_choice_sleep_mic(struct st_hba *hba, pm_message_t state)
case PM_EVENT_SUSPEND:
return ST_S3;
case PM_EVENT_HIBERNATE:
+ case PM_EVENT_POWEROFF:
hba->msi_lock = 0;
return ST_S4;
default:
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
index bde2cc386afd..bf51a17c5be6 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
@@ -19,7 +19,7 @@ static const struct rapl_mmio_regs rapl_mmio_default = {
.limits[RAPL_DOMAIN_DRAM] = BIT(POWER_LIMIT2),
};
-static int rapl_mmio_read_raw(int cpu, struct reg_action *ra)
+static int rapl_mmio_read_raw(int cpu, struct reg_action *ra, bool atomic)
{
if (!ra->reg.mmio)
return -EINVAL;
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index ea3cab99c5d4..5d6dba681e50 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -1748,6 +1748,7 @@ sl811h_suspend(struct platform_device *dev, pm_message_t state)
break;
case PM_EVENT_SUSPEND:
case PM_EVENT_HIBERNATE:
+ case PM_EVENT_POWEROFF:
case PM_EVENT_PRETHAW: /* explicitly discard hw state */
port_power(sl811, 0);
break;