summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_survivability_mode.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_survivability_mode.c')
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.c69
1 files changed, 50 insertions, 19 deletions
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index cb813b337fd3..1f710b3fc599 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -10,6 +10,7 @@
#include <linux/pci.h>
#include <linux/sysfs.h>
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_heci_gsc.h"
@@ -28,20 +29,32 @@
* This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware
* to be flashed through mei and collect telemetry. The driver's probe flow is modified
* such that it enters survivability mode when pcode initialization is incomplete and boot status
- * denotes a failure. The driver then populates the survivability_mode PCI sysfs indicating
- * survivability mode and provides additional information required for debug
+ * denotes a failure.
*
- * KMD exposes below admin-only readable sysfs in survivability mode
+ * Survivability mode can also be entered manually using the survivability mode attribute available
+ * through configfs which is beneficial in several usecases. It can be used to address scenarios
+ * where pcode does not detect failure or for validation purposes. It can also be used in
+ * In-Field-Repair (IFR) to repair a single card without impacting the other cards in a node.
*
- * device/survivability_mode: The presence of this file indicates that the card is in survivability
- * mode. Also, provides additional information on why the driver entered
- * survivability mode.
+ * Use below command enable survivability mode manually::
*
- * Capability Information - Provides boot status
- * Postcode Information - Provides information about the failure
- * Overflow Information - Provides history of previous failures
- * Auxiliary Information - Certain failures may have information in
- * addition to postcode information
+ * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode
+ *
+ * Refer :ref:`xe_configfs` for more details on how to use configfs
+ *
+ * Survivability mode is indicated by the below admin-only readable sysfs which provides additional
+ * debug information::
+ *
+ * /sys/bus/pci/devices/<device>/surivability_mode
+ *
+ * Capability Information:
+ * Provides boot status
+ * Postcode Information:
+ * Provides information about the failure
+ * Overflow Information
+ * Provides history of previous failures
+ * Auxiliary Information
+ * Certain failures may have information in addition to postcode information
*/
static u32 aux_history_offset(u32 reg_value)
@@ -133,6 +146,7 @@ static void xe_survivability_mode_fini(void *arg)
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct device *dev = &pdev->dev;
+ xe_configfs_clear_survivability_mode(pdev);
sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
}
@@ -186,24 +200,41 @@ bool xe_survivability_mode_is_enabled(struct xe_device *xe)
return xe->survivability.mode;
}
-/*
- * survivability_mode_requested - check if it's possible to enable
- * survivability mode and that was requested by firmware
+/**
+ * xe_survivability_mode_is_requested - check if it's possible to enable survivability
+ * mode that was requested by firmware or userspace
+ * @xe: xe device instance
*
- * This function reads the boot status from Pcode.
+ * This function reads configfs and boot status from Pcode.
*
* Return: true if platform support is available and boot status indicates
- * failure, false otherwise.
+ * failure or if survivability mode is requested, false otherwise.
*/
-static bool survivability_mode_requested(struct xe_device *xe)
+bool xe_survivability_mode_is_requested(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
u32 data;
+ bool survivability_mode;
- if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE || IS_SRIOV_VF(xe))
+ if (!IS_DGFX(xe) || IS_SRIOV_VF(xe))
return false;
+ survivability_mode = xe_configfs_get_survivability_mode(pdev);
+
+ if (xe->info.platform < XE_BATTLEMAGE) {
+ if (survivability_mode) {
+ dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n");
+ xe_configfs_clear_survivability_mode(pdev);
+ }
+ return false;
+ }
+
+ /* Enable survivability mode if set via configfs */
+ if (survivability_mode)
+ return true;
+
data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
@@ -226,7 +257,7 @@ int xe_survivability_mode_enable(struct xe_device *xe)
struct xe_survivability_info *info;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- if (!survivability_mode_requested(xe))
+ if (!xe_survivability_mode_is_requested(xe))
return 0;
survivability->size = MAX_SCRATCH_MMIO;