summaryrefslogtreecommitdiff
path: root/drivers/acpi/apei
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r--drivers/acpi/apei/Kconfig1
-rw-r--r--drivers/acpi/apei/apei-internal.h2
-rw-r--r--drivers/acpi/apei/einj-core.c531
-rw-r--r--drivers/acpi/apei/einj-cxl.c2
-rw-r--r--drivers/acpi/apei/erst-dbg.c8
-rw-r--r--drivers/acpi/apei/ghes.c163
6 files changed, 520 insertions, 187 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index 3cfe7e7475f2..070c07d68dfb 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -23,6 +23,7 @@ config ACPI_APEI_GHES
select ACPI_HED
select IRQ_WORK
select GENERIC_ALLOCATOR
+ select ARM_SDE_INTERFACE if ARM64
help
Generic Hardware Error Source provides a way to report
platform hardware errors (such as that from chipset). It
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index cd2766c69d78..77c10a7a7a9f 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -131,7 +131,7 @@ static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
int apei_osc_setup(void);
-int einj_get_available_error_type(u32 *type);
+int einj_get_available_error_type(u32 *type, int einj_action);
int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
u64 param4);
int einj_cxl_rch_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
index 04731a5b01fa..305c240a303f 100644
--- a/drivers/acpi/apei/einj-core.c
+++ b/drivers/acpi/apei/einj-core.c
@@ -21,7 +21,7 @@
#include <linux/nmi.h>
#include <linux/delay.h>
#include <linux/mm.h>
-#include <linux/platform_device.h>
+#include <linux/device/faux.h>
#include <linux/unaligned.h>
#include "apei-internal.h"
@@ -33,6 +33,8 @@
#define SLEEP_UNIT_MAX 5000 /* 5ms */
/* Firmware should respond within 1 seconds */
#define FIRMWARE_TIMEOUT (1 * USEC_PER_SEC)
+#define COMPONENT_LEN 16
+#define ACPI65_EINJV2_SUPP BIT(30)
#define ACPI5_VENDOR_BIT BIT(31)
#define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \
ACPI_EINJ_MEMORY_UNCORRECTABLE | \
@@ -49,6 +51,28 @@
*/
static int acpi5;
+struct syndrome_array {
+ union {
+ u8 acpi_id[COMPONENT_LEN];
+ u8 device_id[COMPONENT_LEN];
+ u8 pcie_sbdf[COMPONENT_LEN];
+ u8 vendor_id[COMPONENT_LEN];
+ } comp_id;
+ union {
+ u8 proc_synd[COMPONENT_LEN];
+ u8 mem_synd[COMPONENT_LEN];
+ u8 pcie_synd[COMPONENT_LEN];
+ u8 vendor_synd[COMPONENT_LEN];
+ } comp_synd;
+};
+
+struct einjv2_extension_struct {
+ u32 length;
+ u16 revision;
+ u16 component_arr_count;
+ struct syndrome_array component_arr[] __counted_by(component_arr_count);
+};
+
struct set_error_type_with_address {
u32 type;
u32 vendor_extension;
@@ -57,11 +81,13 @@ struct set_error_type_with_address {
u64 memory_address;
u64 memory_address_range;
u32 pcie_sbdf;
+ struct einjv2_extension_struct einjv2_struct;
};
enum {
SETWA_FLAGS_APICID = 1,
SETWA_FLAGS_MEM = 2,
SETWA_FLAGS_PCIE_SBDF = 4,
+ SETWA_FLAGS_EINJV2 = 8,
};
/*
@@ -83,6 +109,11 @@ static struct debugfs_blob_wrapper vendor_blob;
static struct debugfs_blob_wrapper vendor_errors;
static char vendor_dev[64];
+static u32 max_nr_components;
+static u32 available_error_type;
+static u32 available_error_type_v2;
+static struct syndrome_array *syndrome_data;
+
/*
* Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
* EINJ table through an unpublished extension. Use with caution as
@@ -149,7 +180,10 @@ static DEFINE_MUTEX(einj_mutex);
*/
bool einj_initialized __ro_after_init;
-static void *einj_param;
+static void __iomem *einj_param;
+static u32 v5param_size;
+static u32 v66param_size;
+static bool is_v2;
static void einj_exec_ctx_init(struct apei_exec_context *ctx)
{
@@ -157,13 +191,13 @@ static void einj_exec_ctx_init(struct apei_exec_context *ctx)
EINJ_TAB_ENTRY(einj_tab), einj_tab->entries);
}
-static int __einj_get_available_error_type(u32 *type)
+static int __einj_get_available_error_type(u32 *type, int einj_action)
{
struct apei_exec_context ctx;
int rc;
einj_exec_ctx_init(&ctx);
- rc = apei_exec_run(&ctx, ACPI_EINJ_GET_ERROR_TYPE);
+ rc = apei_exec_run(&ctx, einj_action);
if (rc)
return rc;
*type = apei_exec_ctx_get_output(&ctx);
@@ -172,17 +206,34 @@ static int __einj_get_available_error_type(u32 *type)
}
/* Get error injection capabilities of the platform */
-int einj_get_available_error_type(u32 *type)
+int einj_get_available_error_type(u32 *type, int einj_action)
{
int rc;
mutex_lock(&einj_mutex);
- rc = __einj_get_available_error_type(type);
+ rc = __einj_get_available_error_type(type, einj_action);
mutex_unlock(&einj_mutex);
return rc;
}
+static int einj_get_available_error_types(u32 *type1, u32 *type2)
+{
+ int rc;
+
+ rc = einj_get_available_error_type(type1, ACPI_EINJ_GET_ERROR_TYPE);
+ if (rc)
+ return rc;
+ if (*type1 & ACPI65_EINJV2_SUPP) {
+ rc = einj_get_available_error_type(type2,
+ ACPI_EINJV2_GET_ERROR_TYPE);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
static int einj_timedout(u64 *t)
{
if ((s64)*t < SLEEP_UNIT_MIN) {
@@ -214,24 +265,44 @@ static void check_vendor_extension(u64 paddr,
struct set_error_type_with_address *v5param)
{
int offset = v5param->vendor_extension;
- struct vendor_error_type_extension *v;
+ struct vendor_error_type_extension v;
+ struct vendor_error_type_extension __iomem *p;
u32 sbdf;
if (!offset)
return;
- v = acpi_os_map_iomem(paddr + offset, sizeof(*v));
- if (!v)
+ p = acpi_os_map_iomem(paddr + offset, sizeof(*p));
+ if (!p)
return;
- get_oem_vendor_struct(paddr, offset, v);
- sbdf = v->pcie_sbdf;
+ memcpy_fromio(&v, p, sizeof(v));
+ get_oem_vendor_struct(paddr, offset, &v);
+ sbdf = v.pcie_sbdf;
sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n",
sbdf >> 24, (sbdf >> 16) & 0xff,
(sbdf >> 11) & 0x1f, (sbdf >> 8) & 0x7,
- v->vendor_id, v->device_id, v->rev_id);
- acpi_os_unmap_iomem(v, sizeof(*v));
+ v.vendor_id, v.device_id, v.rev_id);
+ acpi_os_unmap_iomem(p, sizeof(v));
}
-static void *einj_get_parameter_address(void)
+static u32 einjv2_init(struct einjv2_extension_struct *e)
+{
+ if (e->revision != 1) {
+ pr_info("Unknown v2 extension revision %u\n", e->revision);
+ return 0;
+ }
+ if (e->length < sizeof(*e) || e->length > PAGE_SIZE) {
+ pr_info(FW_BUG "Bad1 v2 extension length %u\n", e->length);
+ return 0;
+ }
+ if ((e->length - sizeof(*e)) % sizeof(e->component_arr[0])) {
+ pr_info(FW_BUG "Bad2 v2 extension length %u\n", e->length);
+ return 0;
+ }
+
+ return (e->length - sizeof(*e)) / sizeof(e->component_arr[0]);
+}
+
+static void __iomem *einj_get_parameter_address(void)
{
int i;
u64 pa_v4 = 0, pa_v5 = 0;
@@ -252,26 +323,43 @@ static void *einj_get_parameter_address(void)
entry++;
}
if (pa_v5) {
- struct set_error_type_with_address *v5param;
+ struct set_error_type_with_address v5param;
+ struct set_error_type_with_address __iomem *p;
- v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
- if (v5param) {
+ v5param_size = sizeof(v5param);
+ p = acpi_os_map_iomem(pa_v5, sizeof(*p));
+ if (p) {
+ memcpy_fromio(&v5param, p, v5param_size);
acpi5 = 1;
- check_vendor_extension(pa_v5, v5param);
- return v5param;
+ check_vendor_extension(pa_v5, &v5param);
+ if (available_error_type & ACPI65_EINJV2_SUPP) {
+ struct einjv2_extension_struct *e;
+
+ e = &v5param.einjv2_struct;
+ max_nr_components = einjv2_init(e);
+
+ /* remap including einjv2_extension_struct */
+ acpi_os_unmap_iomem(p, v5param_size);
+ v66param_size = v5param_size - sizeof(*e) + e->length;
+ p = acpi_os_map_iomem(pa_v5, v66param_size);
+ }
+
+ return p;
}
}
if (param_extension && pa_v4) {
- struct einj_parameter *v4param;
+ struct einj_parameter v4param;
+ struct einj_parameter __iomem *p;
- v4param = acpi_os_map_iomem(pa_v4, sizeof(*v4param));
- if (!v4param)
+ p = acpi_os_map_iomem(pa_v4, sizeof(*p));
+ if (!p)
return NULL;
- if (v4param->reserved1 || v4param->reserved2) {
- acpi_os_unmap_iomem(v4param, sizeof(*v4param));
+ memcpy_fromio(&v4param, p, sizeof(v4param));
+ if (v4param.reserved1 || v4param.reserved2) {
+ acpi_os_unmap_iomem(p, sizeof(v4param));
return NULL;
}
- return v4param;
+ return p;
}
return NULL;
@@ -317,7 +405,8 @@ static struct acpi_generic_address *einj_get_trigger_parameter_region(
static int __einj_error_trigger(u64 trigger_paddr, u32 type,
u64 param1, u64 param2)
{
- struct acpi_einj_trigger *trigger_tab = NULL;
+ struct acpi_einj_trigger trigger_tab;
+ struct acpi_einj_trigger *full_trigger_tab;
struct apei_exec_context trigger_ctx;
struct apei_resources trigger_resources;
struct acpi_whea_header *trigger_entry;
@@ -325,54 +414,60 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type,
u32 table_size;
int rc = -EIO;
struct acpi_generic_address *trigger_param_region = NULL;
+ struct acpi_einj_trigger __iomem *p = NULL;
- r = request_mem_region(trigger_paddr, sizeof(*trigger_tab),
+ r = request_mem_region(trigger_paddr, sizeof(trigger_tab),
"APEI EINJ Trigger Table");
if (!r) {
pr_err("Can not request [mem %#010llx-%#010llx] for Trigger table\n",
(unsigned long long)trigger_paddr,
(unsigned long long)trigger_paddr +
- sizeof(*trigger_tab) - 1);
+ sizeof(trigger_tab) - 1);
goto out;
}
- trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab));
- if (!trigger_tab) {
+ p = ioremap_cache(trigger_paddr, sizeof(*p));
+ if (!p) {
pr_err("Failed to map trigger table!\n");
goto out_rel_header;
}
- rc = einj_check_trigger_header(trigger_tab);
+ memcpy_fromio(&trigger_tab, p, sizeof(trigger_tab));
+ rc = einj_check_trigger_header(&trigger_tab);
if (rc) {
pr_warn(FW_BUG "Invalid trigger error action table.\n");
goto out_rel_header;
}
/* No action structures in the TRIGGER_ERROR table, nothing to do */
- if (!trigger_tab->entry_count)
+ if (!trigger_tab.entry_count)
goto out_rel_header;
rc = -EIO;
- table_size = trigger_tab->table_size;
- r = request_mem_region(trigger_paddr + sizeof(*trigger_tab),
- table_size - sizeof(*trigger_tab),
+ table_size = trigger_tab.table_size;
+ full_trigger_tab = kmalloc(table_size, GFP_KERNEL);
+ if (!full_trigger_tab)
+ goto out_rel_header;
+ r = request_mem_region(trigger_paddr + sizeof(trigger_tab),
+ table_size - sizeof(trigger_tab),
"APEI EINJ Trigger Table");
if (!r) {
pr_err("Can not request [mem %#010llx-%#010llx] for Trigger Table Entry\n",
- (unsigned long long)trigger_paddr + sizeof(*trigger_tab),
+ (unsigned long long)trigger_paddr + sizeof(trigger_tab),
(unsigned long long)trigger_paddr + table_size - 1);
- goto out_rel_header;
+ goto out_free_trigger_tab;
}
- iounmap(trigger_tab);
- trigger_tab = ioremap_cache(trigger_paddr, table_size);
- if (!trigger_tab) {
+ iounmap(p);
+ p = ioremap_cache(trigger_paddr, table_size);
+ if (!p) {
pr_err("Failed to map trigger table!\n");
goto out_rel_entry;
}
+ memcpy_fromio(full_trigger_tab, p, table_size);
trigger_entry = (struct acpi_whea_header *)
- ((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
+ ((char *)full_trigger_tab + sizeof(struct acpi_einj_trigger));
apei_resources_init(&trigger_resources);
apei_exec_ctx_init(&trigger_ctx, einj_ins_type,
ARRAY_SIZE(einj_ins_type),
- trigger_entry, trigger_tab->entry_count);
+ trigger_entry, trigger_tab.entry_count);
rc = apei_exec_collect_resources(&trigger_ctx, &trigger_resources);
if (rc)
goto out_fini;
@@ -390,7 +485,7 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type,
apei_resources_init(&addr_resources);
trigger_param_region = einj_get_trigger_parameter_region(
- trigger_tab, param1, param2);
+ full_trigger_tab, param1, param2);
if (trigger_param_region) {
rc = apei_resources_add(&addr_resources,
trigger_param_region->address,
@@ -419,23 +514,34 @@ out_release:
out_fini:
apei_resources_fini(&trigger_resources);
out_rel_entry:
- release_mem_region(trigger_paddr + sizeof(*trigger_tab),
- table_size - sizeof(*trigger_tab));
+ release_mem_region(trigger_paddr + sizeof(trigger_tab),
+ table_size - sizeof(trigger_tab));
+out_free_trigger_tab:
+ kfree(full_trigger_tab);
out_rel_header:
- release_mem_region(trigger_paddr, sizeof(*trigger_tab));
+ release_mem_region(trigger_paddr, sizeof(trigger_tab));
out:
- if (trigger_tab)
- iounmap(trigger_tab);
+ if (p)
+ iounmap(p);
return rc;
}
+static bool is_end_of_list(u8 *val)
+{
+ for (int i = 0; i < COMPONENT_LEN; ++i) {
+ if (val[i] != 0xFF)
+ return false;
+ }
+ return true;
+}
static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
u64 param3, u64 param4)
{
struct apei_exec_context ctx;
+ u32 param_size = is_v2 ? v66param_size : v5param_size;
u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
- int rc;
+ int i, rc;
einj_exec_ctx_init(&ctx);
@@ -444,8 +550,13 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
return rc;
apei_exec_ctx_set_input(&ctx, type);
if (acpi5) {
- struct set_error_type_with_address *v5param = einj_param;
+ struct set_error_type_with_address *v5param;
+
+ v5param = kmalloc(param_size, GFP_KERNEL);
+ if (!v5param)
+ return -ENOMEM;
+ memcpy_fromio(v5param, einj_param, param_size);
v5param->type = type;
if (type & ACPI5_VENDOR_BIT) {
switch (vendor_flags) {
@@ -465,8 +576,21 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
v5param->flags = flags;
v5param->memory_address = param1;
v5param->memory_address_range = param2;
- v5param->apicid = param3;
- v5param->pcie_sbdf = param4;
+
+ if (is_v2) {
+ for (i = 0; i < max_nr_components; i++) {
+ if (is_end_of_list(syndrome_data[i].comp_id.acpi_id))
+ break;
+ v5param->einjv2_struct.component_arr[i].comp_id =
+ syndrome_data[i].comp_id;
+ v5param->einjv2_struct.component_arr[i].comp_synd =
+ syndrome_data[i].comp_synd;
+ }
+ v5param->einjv2_struct.component_arr_count = i;
+ } else {
+ v5param->apicid = param3;
+ v5param->pcie_sbdf = param4;
+ }
} else {
switch (type) {
case ACPI_EINJ_PROCESSOR_CORRECTABLE:
@@ -490,15 +614,19 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
break;
}
}
+ memcpy_toio(einj_param, v5param, param_size);
+ kfree(v5param);
} else {
rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
if (rc)
return rc;
if (einj_param) {
- struct einj_parameter *v4param = einj_param;
+ struct einj_parameter v4param;
- v4param->param1 = param1;
- v4param->param2 = param2;
+ memcpy_fromio(&v4param, einj_param, sizeof(v4param));
+ v4param.param1 = param1;
+ v4param.param2 = param2;
+ memcpy_toio(einj_param, &v4param, sizeof(v4param));
}
}
rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION);
@@ -541,6 +669,43 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
return rc;
}
+/* Allow almost all types of address except MMIO. */
+static bool is_allowed_range(u64 base_addr, u64 size)
+{
+ int i;
+ /*
+ * MMIO region is usually claimed with IORESOURCE_MEM + IORES_DESC_NONE.
+ * However, IORES_DESC_NONE is treated like a wildcard when we check if
+ * region intersects with known resource. So do an allow list check for
+ * IORES_DESCs that definitely or most likely not MMIO.
+ */
+ int non_mmio_desc[] = {
+ IORES_DESC_CRASH_KERNEL,
+ IORES_DESC_ACPI_TABLES,
+ IORES_DESC_ACPI_NV_STORAGE,
+ IORES_DESC_PERSISTENT_MEMORY,
+ IORES_DESC_PERSISTENT_MEMORY_LEGACY,
+ /* Treat IORES_DESC_DEVICE_PRIVATE_MEMORY as MMIO. */
+ IORES_DESC_RESERVED,
+ IORES_DESC_SOFT_RESERVED,
+ };
+
+ if (region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
+ == REGION_INTERSECTS)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(non_mmio_desc); ++i) {
+ if (region_intersects(base_addr, size, IORESOURCE_MEM, non_mmio_desc[i])
+ == REGION_INTERSECTS)
+ return true;
+ }
+
+ if (arch_is_platform_page(base_addr))
+ return true;
+
+ return false;
+}
+
/* Inject the specified hardware error */
int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
u64 param4)
@@ -549,10 +714,15 @@ int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
u64 base_addr, size;
/* If user manually set "flags", make sure it is legal */
- if (flags && (flags &
- ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+ if (flags && (flags & ~(SETWA_FLAGS_APICID | SETWA_FLAGS_MEM |
+ SETWA_FLAGS_PCIE_SBDF | SETWA_FLAGS_EINJV2)))
return -EINVAL;
+ /* check if type is a valid EINJv2 error type */
+ if (is_v2) {
+ if (!(type & available_error_type_v2))
+ return -EINVAL;
+ }
/*
* We need extra sanity checks for memory errors.
* Other types leap directly to injection.
@@ -582,19 +752,15 @@ int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
* Disallow crazy address masks that give BIOS leeway to pick
* injection address almost anywhere. Insist on page or
* better granularity and that target address is normal RAM or
- * NVDIMM.
+ * as long as is not MMIO.
*/
base_addr = param1 & param2;
size = ~param2 + 1;
- if (((param2 & PAGE_MASK) != PAGE_MASK) ||
- ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
- != REGION_INTERSECTS) &&
- (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY)
- != REGION_INTERSECTS) &&
- (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED)
- != REGION_INTERSECTS) &&
- !arch_is_platform_page(base_addr)))
+ if ((param2 & PAGE_MASK) != PAGE_MASK)
+ return -EINVAL;
+
+ if (!is_allowed_range(base_addr, size))
return -EINVAL;
if (is_zero_pfn(base_addr >> PAGE_SHIFT))
@@ -630,6 +796,8 @@ static u64 error_param2;
static u64 error_param3;
static u64 error_param4;
static struct dentry *einj_debug_dir;
+static char einj_buf[32];
+static bool einj_v2_enabled;
static struct { u32 mask; const char *str; } const einj_error_type_string[] = {
{ BIT(0), "Processor Correctable" },
{ BIT(1), "Processor Uncorrectable non-fatal" },
@@ -646,29 +814,35 @@ static struct { u32 mask; const char *str; } const einj_error_type_string[] = {
{ BIT(31), "Vendor Defined Error Types" },
};
+static struct { u32 mask; const char *str; } const einjv2_error_type_string[] = {
+ { BIT(0), "EINJV2 Processor Error" },
+ { BIT(1), "EINJV2 Memory Error" },
+ { BIT(2), "EINJV2 PCI Express Error" },
+};
+
static int available_error_type_show(struct seq_file *m, void *v)
{
- int rc;
- u32 error_type = 0;
- rc = einj_get_available_error_type(&error_type);
- if (rc)
- return rc;
for (int pos = 0; pos < ARRAY_SIZE(einj_error_type_string); pos++)
- if (error_type & einj_error_type_string[pos].mask)
+ if (available_error_type & einj_error_type_string[pos].mask)
seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask,
einj_error_type_string[pos].str);
-
+ if ((available_error_type & ACPI65_EINJV2_SUPP) && einj_v2_enabled) {
+ for (int pos = 0; pos < ARRAY_SIZE(einjv2_error_type_string); pos++) {
+ if (available_error_type_v2 & einjv2_error_type_string[pos].mask)
+ seq_printf(m, "V2_0x%08x\t%s\n", einjv2_error_type_string[pos].mask,
+ einjv2_error_type_string[pos].str);
+ }
+ }
return 0;
}
DEFINE_SHOW_ATTRIBUTE(available_error_type);
-static int error_type_get(void *data, u64 *val)
+static ssize_t error_type_get(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
{
- *val = error_type;
-
- return 0;
+ return simple_read_from_buffer(buf, count, ppos, einj_buf, strlen(einj_buf));
}
bool einj_is_cxl_error_type(u64 type)
@@ -678,8 +852,7 @@ bool einj_is_cxl_error_type(u64 type)
int einj_validate_error_type(u64 type)
{
- u32 tval, vendor, available_error_type = 0;
- int rc;
+ u32 tval, vendor;
/* Only low 32 bits for error type are valid */
if (type & GENMASK_ULL(63, 32))
@@ -695,20 +868,36 @@ int einj_validate_error_type(u64 type)
/* Only one error type can be specified */
if (tval & (tval - 1))
return -EINVAL;
- if (!vendor) {
- rc = einj_get_available_error_type(&available_error_type);
- if (rc)
- return rc;
- if (!(type & available_error_type))
+ if (!vendor)
+ if (!(type & (available_error_type | available_error_type_v2)))
return -EINVAL;
- }
return 0;
}
-static int error_type_set(void *data, u64 val)
+static ssize_t error_type_set(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
{
int rc;
+ u64 val;
+
+ /* Leave the last character for the NUL terminator */
+ if (count > sizeof(einj_buf) - 1)
+ return -EINVAL;
+
+ memset(einj_buf, 0, sizeof(einj_buf));
+ if (copy_from_user(einj_buf, buf, count))
+ return -EFAULT;
+
+ if (strncmp(einj_buf, "V2_", 3) == 0) {
+ if (!sscanf(einj_buf, "V2_%llx", &val))
+ return -EINVAL;
+ is_v2 = true;
+ } else {
+ if (!sscanf(einj_buf, "%llx", &val))
+ return -EINVAL;
+ is_v2 = false;
+ }
rc = einj_validate_error_type(val);
if (rc)
@@ -716,17 +905,24 @@ static int error_type_set(void *data, u64 val)
error_type = val;
- return 0;
+ return count;
}
-DEFINE_DEBUGFS_ATTRIBUTE(error_type_fops, error_type_get, error_type_set,
- "0x%llx\n");
+static const struct file_operations error_type_fops = {
+ .read = error_type_get,
+ .write = error_type_set,
+};
static int error_inject_set(void *data, u64 val)
{
if (!error_type)
return -EINVAL;
+ if (is_v2)
+ error_flags |= SETWA_FLAGS_EINJV2;
+ else
+ error_flags &= ~SETWA_FLAGS_EINJV2;
+
return einj_error_inject(error_type, error_flags, error_param1, error_param2,
error_param3, error_param4);
}
@@ -749,17 +945,104 @@ static int einj_check_table(struct acpi_table_einj *einj_tab)
return 0;
}
-static int __init einj_probe(struct platform_device *pdev)
+static ssize_t u128_read(struct file *f, char __user *buf, size_t count, loff_t *off)
+{
+ char output[2 * COMPONENT_LEN + 1];
+ u8 *data = f->f_inode->i_private;
+ int i;
+
+ if (*off >= sizeof(output))
+ return 0;
+
+ for (i = 0; i < COMPONENT_LEN; i++)
+ sprintf(output + 2 * i, "%.02x", data[COMPONENT_LEN - i - 1]);
+ output[2 * COMPONENT_LEN] = '\n';
+
+ return simple_read_from_buffer(buf, count, off, output, sizeof(output));
+}
+
+static ssize_t u128_write(struct file *f, const char __user *buf, size_t count, loff_t *off)
+{
+ char input[2 + 2 * COMPONENT_LEN + 2];
+ u8 *save = f->f_inode->i_private;
+ u8 tmp[COMPONENT_LEN];
+ char byte[3] = {};
+ char *s, *e;
+ ssize_t c;
+ long val;
+ int i;
+
+ /* Require that user supply whole input line in one write(2) syscall */
+ if (*off)
+ return -EINVAL;
+
+ c = simple_write_to_buffer(input, sizeof(input), off, buf, count);
+ if (c < 0)
+ return c;
+
+ if (c < 1 || input[c - 1] != '\n')
+ return -EINVAL;
+
+ /* Empty line means invalidate this entry */
+ if (c == 1) {
+ memset(save, 0xff, COMPONENT_LEN);
+ return c;
+ }
+
+ if (input[0] == '0' && (input[1] == 'x' || input[1] == 'X'))
+ s = input + 2;
+ else
+ s = input;
+ e = input + c - 1;
+
+ for (i = 0; i < COMPONENT_LEN; i++) {
+ byte[1] = *--e;
+ byte[0] = e > s ? *--e : '0';
+ if (kstrtol(byte, 16, &val))
+ return -EINVAL;
+ tmp[i] = val;
+ if (e <= s)
+ break;
+ }
+ while (++i < COMPONENT_LEN)
+ tmp[i] = 0;
+
+ memcpy(save, tmp, COMPONENT_LEN);
+
+ return c;
+}
+
+static const struct file_operations u128_fops = {
+ .read = u128_read,
+ .write = u128_write,
+};
+
+static bool setup_einjv2_component_files(void)
+{
+ char name[32];
+
+ syndrome_data = kcalloc(max_nr_components, sizeof(syndrome_data[0]), GFP_KERNEL);
+ if (!syndrome_data)
+ return false;
+
+ for (int i = 0; i < max_nr_components; i++) {
+ sprintf(name, "component_id%d", i);
+ debugfs_create_file(name, 0600, einj_debug_dir,
+ &syndrome_data[i].comp_id, &u128_fops);
+ sprintf(name, "component_syndrome%d", i);
+ debugfs_create_file(name, 0600, einj_debug_dir,
+ &syndrome_data[i].comp_synd, &u128_fops);
+ }
+
+ return true;
+}
+
+static int __init einj_probe(struct faux_device *fdev)
{
int rc;
acpi_status status;
struct apei_exec_context ctx;
- if (acpi_disabled) {
- pr_debug("ACPI disabled.\n");
- return -ENODEV;
- }
-
status = acpi_get_table(ACPI_SIG_EINJ, 0,
(struct acpi_table_header **)&einj_tab);
if (status == AE_NOT_FOUND) {
@@ -777,6 +1060,10 @@ static int __init einj_probe(struct platform_device *pdev)
goto err_put_table;
}
+ rc = einj_get_available_error_types(&available_error_type, &available_error_type_v2);
+ if (rc)
+ goto err_put_table;
+
rc = -ENOMEM;
einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir());
@@ -821,6 +1108,8 @@ static int __init einj_probe(struct platform_device *pdev)
&error_param4);
debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
einj_debug_dir, &notrigger);
+ if (available_error_type & ACPI65_EINJV2_SUPP)
+ einj_v2_enabled = setup_einjv2_component_files();
}
if (vendor_dev[0]) {
@@ -851,14 +1140,19 @@ err_put_table:
return rc;
}
-static void __exit einj_remove(struct platform_device *pdev)
+static void einj_remove(struct faux_device *fdev)
{
struct apei_exec_context ctx;
if (einj_param) {
- acpi_size size = (acpi5) ?
- sizeof(struct set_error_type_with_address) :
- sizeof(struct einj_parameter);
+ acpi_size size;
+
+ if (v66param_size)
+ size = v66param_size;
+ else if (acpi5)
+ size = v5param_size;
+ else
+ size = sizeof(struct einj_parameter);
acpi_os_unmap_iomem(einj_param, size);
if (vendor_errors.size)
@@ -869,47 +1163,34 @@ static void __exit einj_remove(struct platform_device *pdev)
apei_resources_release(&einj_resources);
apei_resources_fini(&einj_resources);
debugfs_remove_recursive(einj_debug_dir);
+ kfree(syndrome_data);
acpi_put_table((struct acpi_table_header *)einj_tab);
}
-static struct platform_device *einj_dev;
-/*
- * einj_remove() lives in .exit.text. For drivers registered via
- * platform_driver_probe() this is ok because they cannot get unbound at
- * runtime. So mark the driver struct with __refdata to prevent modpost
- * triggering a section mismatch warning.
- */
-static struct platform_driver einj_driver __refdata = {
- .remove = __exit_p(einj_remove),
- .driver = {
- .name = "acpi-einj",
- },
+static struct faux_device *einj_dev;
+static struct faux_device_ops einj_device_ops = {
+ .probe = einj_probe,
+ .remove = einj_remove,
};
static int __init einj_init(void)
{
- struct platform_device_info einj_dev_info = {
- .name = "acpi-einj",
- .id = -1,
- };
- int rc;
+ if (acpi_disabled) {
+ pr_debug("ACPI disabled.\n");
+ return -ENODEV;
+ }
- einj_dev = platform_device_register_full(&einj_dev_info);
- if (IS_ERR(einj_dev))
- return PTR_ERR(einj_dev);
+ einj_dev = faux_device_create("acpi-einj", NULL, &einj_device_ops);
- rc = platform_driver_probe(&einj_driver, einj_probe);
- einj_initialized = rc == 0;
+ if (einj_dev)
+ einj_initialized = true;
return 0;
}
static void __exit einj_exit(void)
{
- if (einj_initialized)
- platform_driver_unregister(&einj_driver);
-
- platform_device_unregister(einj_dev);
+ faux_device_destroy(einj_dev);
}
module_init(einj_init);
diff --git a/drivers/acpi/apei/einj-cxl.c b/drivers/acpi/apei/einj-cxl.c
index 78da9ae543a2..e70a416ec925 100644
--- a/drivers/acpi/apei/einj-cxl.c
+++ b/drivers/acpi/apei/einj-cxl.c
@@ -30,7 +30,7 @@ int einj_cxl_available_error_type_show(struct seq_file *m, void *v)
int cxl_err, rc;
u32 available_error_type = 0;
- rc = einj_get_available_error_type(&available_error_type);
+ rc = einj_get_available_error_type(&available_error_type, ACPI_EINJ_GET_ERROR_TYPE);
if (rc)
return rc;
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index 246076341e8c..ff0e8bf8e97a 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -60,9 +60,8 @@ static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
switch (cmd) {
case APEI_ERST_CLEAR_RECORD:
- rc = copy_from_user(&record_id, (void __user *)arg,
- sizeof(record_id));
- if (rc)
+ if (copy_from_user(&record_id, (void __user *)arg,
+ sizeof(record_id)))
return -EFAULT;
return erst_clear(record_id);
case APEI_ERST_GET_RECORD_COUNT:
@@ -175,8 +174,7 @@ static ssize_t erst_dbg_write(struct file *filp, const char __user *ubuf,
erst_dbg_buf = p;
erst_dbg_buf_len = usize;
}
- rc = copy_from_user(erst_dbg_buf, ubuf, usize);
- if (rc) {
+ if (copy_from_user(erst_dbg_buf, ubuf, usize)) {
rc = -EFAULT;
goto out;
}
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 289e365f84b2..0dc767392a6c 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -22,6 +22,7 @@
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/acpi.h>
+#include <linux/bitfield.h>
#include <linux/io.h>
#include <linux/interrupt.h>
#include <linux/timer.h>
@@ -43,6 +44,7 @@
#include <linux/uuid.h>
#include <linux/ras.h>
#include <linux/task_work.h>
+#include <linux/vmcore_info.h>
#include <acpi/actbl1.h>
#include <acpi/ghes.h>
@@ -464,39 +466,58 @@ static void ghes_clear_estatus(struct ghes *ghes,
ghes_ack_error(ghes->generic_v2);
}
-/*
- * Called as task_work before returning to user-space.
- * Ensure any queued work has been done before we return to the context that
- * triggered the notification.
+/**
+ * struct ghes_task_work - for synchronous RAS event
+ *
+ * @twork: callback_head for task work
+ * @pfn: page frame number of corrupted page
+ * @flags: work control flags
+ *
+ * Structure to pass task work to be handled before
+ * returning to user-space via task_work_add().
*/
-static void ghes_kick_task_work(struct callback_head *head)
+struct ghes_task_work {
+ struct callback_head twork;
+ u64 pfn;
+ int flags;
+};
+
+static void memory_failure_cb(struct callback_head *twork)
{
- struct acpi_hest_generic_status *estatus;
- struct ghes_estatus_node *estatus_node;
- u32 node_len;
+ struct ghes_task_work *twcb = container_of(twork, struct ghes_task_work, twork);
+ int ret;
- estatus_node = container_of(head, struct ghes_estatus_node, task_work);
- if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
- memory_failure_queue_kick(estatus_node->task_work_cpu);
+ ret = memory_failure(twcb->pfn, twcb->flags);
+ gen_pool_free(ghes_estatus_pool, (unsigned long)twcb, sizeof(*twcb));
- estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
- node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus));
- gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len);
+ if (!ret || ret == -EHWPOISON || ret == -EOPNOTSUPP)
+ return;
+
+ pr_err("%#llx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
+ twcb->pfn, current->comm, task_pid_nr(current));
+ force_sig(SIGBUS);
}
static bool ghes_do_memory_failure(u64 physical_addr, int flags)
{
+ struct ghes_task_work *twcb;
unsigned long pfn;
if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
return false;
pfn = PHYS_PFN(physical_addr);
- if (!pfn_valid(pfn) && !arch_is_platform_page(physical_addr)) {
- pr_warn_ratelimited(FW_WARN GHES_PFX
- "Invalid address in generic error data: %#llx\n",
- physical_addr);
- return false;
+
+ if (flags == MF_ACTION_REQUIRED && current->mm) {
+ twcb = (void *)gen_pool_alloc(ghes_estatus_pool, sizeof(*twcb));
+ if (!twcb)
+ return false;
+
+ twcb->pfn = pfn;
+ twcb->flags = flags;
+ init_task_work(&twcb->twork, memory_failure_cb);
+ task_work_add(current, &twcb->twork, TWA_RESUME);
+ return true;
}
memory_failure_queue(pfn, flags);
@@ -527,26 +548,25 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
}
static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
- int sev, bool sync)
+ int sev, bool sync)
{
struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
int flags = sync ? MF_ACTION_REQUIRED : 0;
+ char error_type[120];
bool queued = false;
int sec_sev, i;
char *p;
- log_arm_hw_error(err);
-
sec_sev = ghes_severity(gdata->error_severity);
+ log_arm_hw_error(err, sec_sev);
if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
return false;
p = (char *)(err + 1);
for (i = 0; i < err->err_info_num; i++) {
struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
- bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
+ bool is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
- const char *error_type = "unknown error";
/*
* The field (err_info->error_info & BIT(26)) is fixed to set to
@@ -560,12 +580,15 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
continue;
}
- if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
- error_type = cper_proc_error_type_strs[err_info->type];
+ cper_bits_to_str(error_type, sizeof(error_type),
+ FIELD_GET(CPER_ARM_ERR_TYPE_MASK, err_info->type),
+ cper_proc_error_type_strs,
+ ARRAY_SIZE(cper_proc_error_type_strs));
pr_warn_ratelimited(FW_WARN GHES_PFX
- "Unhandled processor error type: %s\n",
- error_type);
+ "Unhandled processor error type 0x%02x: %s%s\n",
+ err_info->type, error_type,
+ (err_info->type & ~CPER_ARM_ERR_TYPE_MASK) ? " with reserved bit(s)" : "");
p += err_info->length;
}
@@ -842,7 +865,41 @@ int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
}
EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL");
-static bool ghes_do_proc(struct ghes *ghes,
+static void ghes_log_hwerr(int sev, guid_t *sec_type)
+{
+ if (sev != CPER_SEV_RECOVERABLE)
+ return;
+
+ if (guid_equal(sec_type, &CPER_SEC_PROC_ARM) ||
+ guid_equal(sec_type, &CPER_SEC_PROC_GENERIC) ||
+ guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
+ hwerr_log_error_type(HWERR_RECOV_CPU);
+ return;
+ }
+
+ if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR) ||
+ guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID) ||
+ guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID) ||
+ guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) {
+ hwerr_log_error_type(HWERR_RECOV_CXL);
+ return;
+ }
+
+ if (guid_equal(sec_type, &CPER_SEC_PCIE) ||
+ guid_equal(sec_type, &CPER_SEC_PCI_X_BUS)) {
+ hwerr_log_error_type(HWERR_RECOV_PCI);
+ return;
+ }
+
+ if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
+ hwerr_log_error_type(HWERR_RECOV_MEMORY);
+ return;
+ }
+
+ hwerr_log_error_type(HWERR_RECOV_OTHERS);
+}
+
+static void ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
int sev, sec_sev;
@@ -863,6 +920,7 @@ static bool ghes_do_proc(struct ghes *ghes,
if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
fru_text = gdata->fru_text;
+ ghes_log_hwerr(sev, sec_type);
if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
@@ -870,11 +928,9 @@ static bool ghes_do_proc(struct ghes *ghes,
arch_apei_report_mem_error(sev, mem_err);
queued = ghes_handle_memory_failure(gdata, sev, sync);
- }
- else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
+ } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
ghes_handle_aer(gdata);
- }
- else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
+ } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
queued = ghes_handle_arm_hw_error(gdata, sev, sync);
} else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata);
@@ -902,7 +958,16 @@ static bool ghes_do_proc(struct ghes *ghes,
}
}
- return queued;
+ /*
+ * If no memory failure work is queued for abnormal synchronous
+ * errors, do a force kill.
+ */
+ if (sync && !queued) {
+ dev_err(ghes->dev,
+ HW_ERR GHES_PFX "%s:%d: synchronous unrecoverable error (SIGBUS)\n",
+ current->comm, task_pid_nr(current));
+ force_sig(SIGBUS);
+ }
}
static void __ghes_print_estatus(const char *pfx,
@@ -1088,6 +1153,8 @@ static void __ghes_panic(struct ghes *ghes,
__ghes_print_estatus(KERN_EMERG, ghes->generic, estatus);
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
+
ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
if (!panic_timeout)
@@ -1138,7 +1205,7 @@ static void ghes_add_timer(struct ghes *ghes)
static void ghes_poll_func(struct timer_list *t)
{
- struct ghes *ghes = from_timer(ghes, t, timer);
+ struct ghes *ghes = timer_container_of(ghes, t, timer);
unsigned long flags;
spin_lock_irqsave(&ghes_notify_lock_irq, flags);
@@ -1171,12 +1238,10 @@ static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
int ret = NOTIFY_DONE;
spin_lock_irqsave(&ghes_notify_lock_irq, flags);
- rcu_read_lock();
list_for_each_entry_rcu(ghes, &ghes_hed, list) {
if (!ghes_proc(ghes))
ret = NOTIFY_OK;
}
- rcu_read_unlock();
spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
return ret;
@@ -1206,9 +1271,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
struct ghes_estatus_node *estatus_node;
struct acpi_hest_generic *generic;
struct acpi_hest_generic_status *estatus;
- bool task_work_pending;
u32 len, node_len;
- int ret;
llnode = llist_del_all(&ghes_estatus_llist);
/*
@@ -1223,25 +1286,16 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
len = cper_estatus_len(estatus);
node_len = GHES_ESTATUS_NODE_LEN(len);
- task_work_pending = ghes_do_proc(estatus_node->ghes, estatus);
+
+ ghes_do_proc(estatus_node->ghes, estatus);
+
if (!ghes_estatus_cached(estatus)) {
generic = estatus_node->generic;
if (ghes_print_estatus(NULL, generic, estatus))
ghes_estatus_cache_add(generic, estatus);
}
-
- if (task_work_pending && current->mm) {
- estatus_node->task_work.func = ghes_kick_task_work;
- estatus_node->task_work_cpu = smp_processor_id();
- ret = task_work_add(current, &estatus_node->task_work,
- TWA_RESUME);
- if (ret)
- estatus_node->task_work.func = NULL;
- }
-
- if (!estatus_node->task_work.func)
- gen_pool_free(ghes_estatus_pool,
- (unsigned long)estatus_node, node_len);
+ gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
+ node_len);
llnode = next;
}
@@ -1302,7 +1356,6 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
estatus_node->ghes = ghes;
estatus_node->generic = ghes->generic;
- estatus_node->task_work.func = NULL;
estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) {
@@ -1715,7 +1768,7 @@ void __init acpi_ghes_init(void)
{
int rc;
- sdei_init();
+ acpi_sdei_init();
if (acpi_disabled)
return;