diff options
Diffstat (limited to 'drivers/cpufreq/intel_pstate.c')
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 181 |
1 files changed, 150 insertions, 31 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index db8c99535e61..64587d318267 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -221,6 +221,7 @@ struct global_params { * @sched_flags: Store scheduler flags for possible cross CPU update * @hwp_boost_min: Last HWP boosted min performance * @suspended: Whether or not the driver has been suspended. + * @pd_registered: Set when a perf domain is registered for this CPU. * @hwp_notify_work: workqueue for HWP notifications. * * This structure stores per CPU instance data for all CPUs. @@ -260,6 +261,9 @@ struct cpudata { unsigned int sched_flags; u32 hwp_boost_min; bool suspended; +#ifdef CONFIG_ENERGY_MODEL + bool pd_registered; +#endif struct delayed_work hwp_notify_work; }; @@ -303,6 +307,7 @@ static bool hwp_is_hybrid; static struct cpufreq_driver *intel_pstate_driver __read_mostly; +#define INTEL_PSTATE_CORE_SCALING 100000 #define HYBRID_SCALING_FACTOR_ADL 78741 #define HYBRID_SCALING_FACTOR_MTL 80000 #define HYBRID_SCALING_FACTOR_LNL 86957 @@ -311,7 +316,7 @@ static int hybrid_scaling_factor; static inline int core_get_scaling(void) { - return 100000; + return INTEL_PSTATE_CORE_SCALING; } #ifdef CONFIG_ACPI @@ -948,12 +953,124 @@ static struct cpudata *hybrid_max_perf_cpu __read_mostly; */ static DEFINE_MUTEX(hybrid_capacity_lock); +#ifdef CONFIG_ENERGY_MODEL +#define HYBRID_EM_STATE_COUNT 4 + +static int hybrid_active_power(struct device *dev, unsigned long *power, + unsigned long *freq) +{ + /* + * Create "utilization bins" of 0-40%, 40%-60%, 60%-80%, and 80%-100% + * of the maximum capacity such that two CPUs of the same type will be + * regarded as equally attractive if the utilization of each of them + * falls into the same bin, which should prevent tasks from being + * migrated between them too often. + * + * For this purpose, return the "frequency" of 2 for the first + * performance level and otherwise leave the value set by the caller. + */ + if (!*freq) + *freq = 2; + + /* No power information. */ + *power = EM_MAX_POWER; + + return 0; +} + +static int hybrid_get_cost(struct device *dev, unsigned long freq, + unsigned long *cost) +{ + struct pstate_data *pstate = &all_cpu_data[dev->id]->pstate; + struct cpu_cacheinfo *cacheinfo = get_cpu_cacheinfo(dev->id); + + /* + * The smaller the perf-to-frequency scaling factor, the larger the IPC + * ratio between the given CPU and the least capable CPU in the system. + * Regard that IPC ratio as the primary cost component and assume that + * the scaling factors for different CPU types will differ by at least + * 5% and they will not be above INTEL_PSTATE_CORE_SCALING. + * + * Add the freq value to the cost, so that the cost of running on CPUs + * of the same type in different "utilization bins" is different. + */ + *cost = div_u64(100ULL * INTEL_PSTATE_CORE_SCALING, pstate->scaling) + freq; + /* + * Increase the cost slightly for CPUs able to access L3 to avoid + * touching it in case some other CPUs of the same type can do the work + * without it. + */ + if (cacheinfo) { + unsigned int i; + + /* Check if L3 cache is there. */ + for (i = 0; i < cacheinfo->num_leaves; i++) { + if (cacheinfo->info_list[i].level == 3) { + *cost += 2; + break; + } + } + } + + return 0; +} + +static bool hybrid_register_perf_domain(unsigned int cpu) +{ + static const struct em_data_callback cb + = EM_ADV_DATA_CB(hybrid_active_power, hybrid_get_cost); + struct cpudata *cpudata = all_cpu_data[cpu]; + struct device *cpu_dev; + + /* + * Registering EM perf domains without enabling asymmetric CPU capacity + * support is not really useful and one domain should not be registered + * more than once. + */ + if (!hybrid_max_perf_cpu || cpudata->pd_registered) + return false; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return false; + + if (em_dev_register_perf_domain(cpu_dev, HYBRID_EM_STATE_COUNT, &cb, + cpumask_of(cpu), false)) + return false; + + cpudata->pd_registered = true; + + return true; +} + +static void hybrid_register_all_perf_domains(void) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) + hybrid_register_perf_domain(cpu); +} + +static void hybrid_update_perf_domain(struct cpudata *cpu) +{ + if (cpu->pd_registered) + em_adjust_cpu_capacity(cpu->cpu); +} +#else /* !CONFIG_ENERGY_MODEL */ +static inline bool hybrid_register_perf_domain(unsigned int cpu) { return false; } +static inline void hybrid_register_all_perf_domains(void) {} +static inline void hybrid_update_perf_domain(struct cpudata *cpu) {} +#endif /* CONFIG_ENERGY_MODEL */ + static void hybrid_set_cpu_capacity(struct cpudata *cpu) { arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf, cpu->capacity_perf, cpu->pstate.max_pstate_physical); + hybrid_update_perf_domain(cpu); + + topology_set_cpu_scale(cpu->cpu, arch_scale_cpu_capacity(cpu->cpu)); pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu, cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf, @@ -1042,6 +1159,11 @@ static void hybrid_refresh_cpu_capacity_scaling(void) guard(mutex)(&hybrid_capacity_lock); __hybrid_refresh_cpu_capacity_scaling(); + /* + * Perf domains are not registered before setting hybrid_max_perf_cpu, + * so register them all after setting up CPU capacity scaling. + */ + hybrid_register_all_perf_domains(); } static void hybrid_init_cpu_capacity_scaling(bool refresh) @@ -1069,7 +1191,7 @@ static void hybrid_init_cpu_capacity_scaling(bool refresh) hybrid_refresh_cpu_capacity_scaling(); /* * Disabling ITMT causes sched domains to be rebuilt to disable asym - * packing and enable asym capacity. + * packing and enable asym capacity and EAS. */ sched_clear_itmt_support(); } @@ -1147,6 +1269,14 @@ static void hybrid_update_capacity(struct cpudata *cpu) } hybrid_set_cpu_capacity(cpu); + /* + * If the CPU was offline to start with and it is going online for the + * first time, a perf domain needs to be registered for it if hybrid + * capacity scaling has been enabled already. In that case, sched + * domains need to be rebuilt to take the new perf domain into account. + */ + if (hybrid_register_perf_domain(cpu->cpu)) + em_rebuild_sched_domains(); unlock: mutex_unlock(&hybrid_capacity_lock); @@ -1356,9 +1486,11 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } -static void __intel_pstate_update_max_freq(struct cpudata *cpudata, - struct cpufreq_policy *policy) +static void __intel_pstate_update_max_freq(struct cpufreq_policy *policy, + struct cpudata *cpudata) { + guard(cpufreq_policy_write)(policy); + if (hwp_active) intel_pstate_get_hwp_cap(cpudata); @@ -1368,42 +1500,34 @@ static void __intel_pstate_update_max_freq(struct cpudata *cpudata, refresh_frequency_limits(policy); } -static void intel_pstate_update_limits(unsigned int cpu) +static bool intel_pstate_update_max_freq(struct cpudata *cpudata) { - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - struct cpudata *cpudata; + struct cpufreq_policy *policy __free(put_cpufreq_policy); + policy = cpufreq_cpu_get(cpudata->cpu); if (!policy) - return; + return false; - cpudata = all_cpu_data[cpu]; + __intel_pstate_update_max_freq(policy, cpudata); - __intel_pstate_update_max_freq(cpudata, policy); + return true; +} - /* Prevent the driver from being unregistered now. */ - mutex_lock(&intel_pstate_driver_lock); +static void intel_pstate_update_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpudata = all_cpu_data[policy->cpu]; - cpufreq_cpu_release(policy); + __intel_pstate_update_max_freq(policy, cpudata); hybrid_update_capacity(cpudata); - - mutex_unlock(&intel_pstate_driver_lock); } static void intel_pstate_update_limits_for_all(void) { int cpu; - for_each_possible_cpu(cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - - if (!policy) - continue; - - __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); - - cpufreq_cpu_release(policy); - } + for_each_possible_cpu(cpu) + intel_pstate_update_max_freq(all_cpu_data[cpu]); mutex_lock(&hybrid_capacity_lock); @@ -1843,13 +1967,8 @@ static void intel_pstate_notify_work(struct work_struct *work) { struct cpudata *cpudata = container_of(to_delayed_work(work), struct cpudata, hwp_notify_work); - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu); - - if (policy) { - __intel_pstate_update_max_freq(cpudata, policy); - - cpufreq_cpu_release(policy); + if (intel_pstate_update_max_freq(cpudata)) { /* * The driver will not be unregistered while this function is * running, so update the capacity without acquiring the driver |