summaryrefslogtreecommitdiff
path: root/drivers/ras/amd/fmpm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-04-15 08:26:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-04-15 08:26:33 -0700
commit1a1d569a75f3ab2923cb62daf356d102e4df2b86 (patch)
tree3690586ebf52fa229a01c6914a4db717cf19d042 /drivers/ras/amd/fmpm.c
parent065d49851e1a345faf112f12f96272e37ccd58ad (diff)
parent58029c39cdc54ac4f4dc40b4a9c05eed9f9b808a (diff)
Merge tag 'edac_urgent_for_v6.15_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/rasHEADmaster
Pull EDAC fixes from Borislav Petkov: "Two fixes to the AMD translation library for the MI300 side of things: - Use the row[13] bit when calculating the memory row to retire - Mask the physical row address in order to avoid creating duplicate error records" * tag 'edac_urgent_for_v6.15_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: RAS/AMD/FMPM: Get masked address RAS/AMD/ATL: Include row[13] bit in row retirement
Diffstat (limited to 'drivers/ras/amd/fmpm.c')
-rw-r--r--drivers/ras/amd/fmpm.c9
1 files changed, 8 insertions, 1 deletions
diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c
index 90de737fbc90..8877c6ff64c4 100644
--- a/drivers/ras/amd/fmpm.c
+++ b/drivers/ras/amd/fmpm.c
@@ -250,6 +250,13 @@ static bool rec_has_valid_entries(struct fru_rec *rec)
return true;
}
+/*
+ * Row retirement is done on MI300 systems, and some bits are 'don't
+ * care' for comparing addresses with unique physical rows. This
+ * includes all column bits and the row[13] bit.
+ */
+#define MASK_ADDR(addr) ((addr) & ~(MI300_UMC_MCA_ROW13 | MI300_UMC_MCA_COL))
+
static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new)
{
/*
@@ -258,7 +265,7 @@ static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_
*
* Also, order the checks from most->least likely to fail to shortcut the code.
*/
- if (old->addr != new->addr)
+ if (MASK_ADDR(old->addr) != MASK_ADDR(new->addr))
return false;
if (old->hw_id != new->hw_id)