diff options
author | Maarten Lankhorst <maarten.lankhorst@linux.intel.com> | 2019-06-26 12:22:54 +0200 |
---|---|---|
committer | Maarten Lankhorst <maarten.lankhorst@linux.intel.com> | 2019-06-26 12:22:54 +0200 |
commit | 355a47ae7ebcf9d605aa809b259d380422e81b8d (patch) | |
tree | f42b3196986c7de89a335b4f57384d249f5663e9 /drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | |
parent | b1622cb3be4557fd086831ca7426eafe5f1acc2e (diff) | |
parent | 80d42db02b3a5beb8cffba08207adf5f4c525ee3 (diff) |
Merge remote-tracking branch 'drm/drm-next' into drm-misc-next-fixes
Some fixes have been accidentally pushed to this, so I cannot fost-forward.
Required to pull in the remove-fbcon-notifiers fixes.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 284 |
1 files changed, 272 insertions, 12 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 95144e49c7f9..194d0c75b072 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -22,7 +22,9 @@ * Authors: Rafał Miłecki <zajec5@gmail.com> * Alex Deucher <alexdeucher@gmail.com> */ -#include <drm/drmP.h> + +#include <drm/drm_debugfs.h> + #include "amdgpu.h" #include "amdgpu_drv.h" #include "amdgpu_pm.h" @@ -31,6 +33,7 @@ #include "amdgpu_smu.h" #include "atom.h" #include <linux/power_supply.h> +#include <linux/pci.h> #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/nospec.h> @@ -67,6 +70,15 @@ static const struct cg_flag_name clocks[] = { {0, NULL}, }; +static const struct hwmon_temp_label { + enum PP_HWMON_TEMP channel; + const char *label; +} temp_label[] = { + {PP_TEMP_EDGE, "edge"}, + {PP_TEMP_JUNCTION, "junction"}, + {PP_TEMP_MEM, "mem"}, +}; + void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) { if (adev->pm.dpm_enabled) { @@ -342,6 +354,16 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, if (current_level == level) return count; + /* profile_exit setting is valid only when current mode is in profile mode */ + if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) && + (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) { + pr_err("Currently not in any profile mode!\n"); + return -EINVAL; + } + if (is_support_sw_smu(adev)) { mutex_lock(&adev->pm.mutex); if (adev->pm.dpm.thermal_active) { @@ -748,7 +770,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev, pr_debug("featuremask = 0x%llx\n", featuremask); - if (adev->powerplay.pp_funcs->set_ppfeature_status) { + if (is_support_sw_smu(adev)) { + ret = smu_set_ppfeature_status(&adev->smu, featuremask); + if (ret) + return -EINVAL; + } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); if (ret) return -EINVAL; @@ -764,7 +790,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->get_ppfeature_status) + if (is_support_sw_smu(adev)) { + return smu_get_ppfeature_status(&adev->smu, buf); + } else if (adev->powerplay.pp_funcs->get_ppfeature_status) return amdgpu_dpm_get_ppfeature_status(adev, buf); return snprintf(buf, PAGE_SIZE, "\n"); @@ -1293,6 +1321,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, } /** + * DOC: mem_busy_percent + * + * The amdgpu driver provides a sysfs API for reading how busy the VRAM + * is as a percentage. The file mem_busy_percent is used for this. + * The SMU firmware computes a percentage of load based on the + * aggregate activity level in the IP cores. + */ +static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int r, value, size = sizeof(value); + + /* read the IP busy sensor */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, + (void *)&value, &size); + + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", value); +} + +/** * DOC: pcie_bw * * The amdgpu driver provides a sysfs API for estimating how much data @@ -1317,6 +1371,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, count0, count1, pcie_get_mps(adev->pdev)); } +/** + * DOC: unique_id + * + * The amdgpu driver provides a sysfs API for providing a unique ID for the GPU + * The file unique_id is used for this. + * This will provide a Unique ID that will persist from machine to machine + * + * NOTE: This will only work for GFX9 and newer. This file will be absent + * on unsupported ASICs (GFX8 and older) + */ +static ssize_t amdgpu_get_unique_id(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + if (adev->unique_id) + return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id); + + return 0; +} + static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, amdgpu_get_dpm_forced_performance_level, @@ -1361,10 +1438,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, amdgpu_set_pp_od_clk_voltage); static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, amdgpu_get_busy_percent, NULL); +static DEVICE_ATTR(mem_busy_percent, S_IRUGO, + amdgpu_get_memory_busy_percent, NULL); static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, amdgpu_get_ppfeature_status, amdgpu_set_ppfeature_status); +static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -1372,6 +1452,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; + int channel = to_sensor_dev_attr(attr)->index; int r, temp, size = sizeof(temp); /* Can't get temperature when the card is off */ @@ -1379,11 +1460,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* get the temperature */ - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, - (void *)&temp, &size); - if (r) - return r; + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + switch (channel) { + case PP_TEMP_JUNCTION: + /* get current junction temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_EDGE: + /* get current edge temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_MEM: + /* get current memory temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + } return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -1404,6 +1506,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", temp); } +static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int hyst = to_sensor_dev_attr(attr)->index; + int temp; + + if (hyst) + temp = adev->pm.dpm.thermal.min_hotspot_temp; + else + temp = adev->pm.dpm.thermal.max_hotspot_crit_temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + +static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int hyst = to_sensor_dev_attr(attr)->index; + int temp; + + if (hyst) + temp = adev->pm.dpm.thermal.min_mem_temp; + else + temp = adev->pm.dpm.thermal.max_mem_crit_temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + +static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int channel = to_sensor_dev_attr(attr)->index; + + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label); +} + +static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int channel = to_sensor_dev_attr(attr)->index; + int temp = 0; + + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + switch (channel) { + case PP_TEMP_JUNCTION: + temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp; + break; + case PP_TEMP_EDGE: + temp = adev->pm.dpm.thermal.max_edge_emergency_temp; + break; + case PP_TEMP_MEM: + temp = adev->pm.dpm.thermal.max_mem_emergency_temp; + break; + } + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, struct device_attribute *attr, char *buf) @@ -1973,11 +2145,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * hwmon interfaces for GPU temperature: * - * - temp1_input: the on die GPU temperature in millidegrees Celsius + * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius + * - temp2_input and temp3_input are supported on SOC15 dGPUs only + * + * - temp[1-3]_label: temperature channel label + * - temp2_label and temp3_label are supported on SOC15 dGPUs only * - * - temp1_crit: temperature critical max value in millidegrees Celsius + * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius + * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only * - * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only + * + * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius + * - these are supported on SOC15 dGPUs only * * hwmon interfaces for GPU voltage: * @@ -2025,9 +2206,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * */ -static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM); +static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); +static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM); static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0); static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0); static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); @@ -2054,6 +2247,18 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, &sensor_dev_attr_temp1_crit.dev_attr.attr, &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp2_input.dev_attr.attr, + &sensor_dev_attr_temp2_crit.dev_attr.attr, + &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp3_input.dev_attr.attr, + &sensor_dev_attr_temp3_crit.dev_attr.attr, + &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp1_emergency.dev_attr.attr, + &sensor_dev_attr_temp2_emergency.dev_attr.attr, + &sensor_dev_attr_temp3_emergency.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + &sensor_dev_attr_temp2_label.dev_attr.attr, + &sensor_dev_attr_temp3_label.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_pwm1_enable.dev_attr.attr, &sensor_dev_attr_pwm1_min.dev_attr.attr, @@ -2176,6 +2381,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_freq2_label.dev_attr.attr)) return 0; + /* only SOC15 dGPUs support hotspot and mem temperatures */ + if (((adev->flags & AMD_IS_APU) || + adev->asic_type < CHIP_VEGA10) && + (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_input.dev_attr.attr || + attr == &sensor_dev_attr_temp3_input.dev_attr.attr || + attr == &sensor_dev_attr_temp2_label.dev_attr.attr || + attr == &sensor_dev_attr_temp3_label.dev_attr.attr)) + return 0; + return effective_mode; } @@ -2480,6 +2701,21 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev) } +int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version) +{ + int r; + + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) { + r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle); + if (r) { + pr_err("smu firmware loading failed\n"); + return r; + } + *smu_version = adev->pm.fw_version; + } + return 0; +} + int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) { struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; @@ -2602,6 +2838,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) "gpu_busy_level\n"); return ret; } + /* APU does not have its own dedicated memory */ + if (!(adev->flags & AMD_IS_APU)) { + ret = device_create_file(adev->dev, + &dev_attr_mem_busy_percent); + if (ret) { + DRM_ERROR("failed to create device file " + "mem_busy_percent\n"); + return ret; + } + } /* PCIe Perf counters won't work on APU nodes */ if (!(adev->flags & AMD_IS_APU)) { ret = device_create_file(adev->dev, &dev_attr_pcie_bw); @@ -2610,6 +2856,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } } + if (adev->unique_id) + ret = device_create_file(adev->dev, &dev_attr_unique_id); + if (ret) { + DRM_ERROR("failed to create device file unique_id\n"); + return ret; + } ret = amdgpu_debugfs_pm_init(adev); if (ret) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -2668,7 +2920,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) &dev_attr_pp_od_clk_voltage); device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); if (!(adev->flags & AMD_IS_APU)) + device_remove_file(adev->dev, &dev_attr_mem_busy_percent); + if (!(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_pcie_bw); + if (adev->unique_id) + device_remove_file(adev->dev, &dev_attr_unique_id); if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_ppfeatures); @@ -2765,6 +3021,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a /* GPU Load */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size)) seq_printf(m, "GPU Load: %u %%\n", value); + /* MEM Load */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size)) + seq_printf(m, "MEM Load: %u %%\n", value); + seq_printf(m, "\n"); /* SMC feature mask */ |