diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-04-15 08:26:33 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-04-15 08:26:33 -0700 |
commit | 1a1d569a75f3ab2923cb62daf356d102e4df2b86 (patch) | |
tree | 3690586ebf52fa229a01c6914a4db717cf19d042 /drivers/ras/amd/atl/umc.c | |
parent | 065d49851e1a345faf112f12f96272e37ccd58ad (diff) | |
parent | 58029c39cdc54ac4f4dc40b4a9c05eed9f9b808a (diff) |
Merge tag 'edac_urgent_for_v6.15_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/rasHEADmaster
Pull EDAC fixes from Borislav Petkov:
"Two fixes to the AMD translation library for the MI300 side of things:
- Use the row[13] bit when calculating the memory row to retire
- Mask the physical row address in order to avoid creating duplicate
error records"
* tag 'edac_urgent_for_v6.15_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
RAS/AMD/FMPM: Get masked address
RAS/AMD/ATL: Include row[13] bit in row retirement
Diffstat (limited to 'drivers/ras/amd/atl/umc.c')
-rw-r--r-- | drivers/ras/amd/atl/umc.c | 19 |
1 files changed, 17 insertions, 2 deletions
diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index dc8aa12f63c8..6e072b7667e9 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -229,7 +229,6 @@ int get_umc_info_mi300(void) * Additionally, the PC and Bank bits may be hashed. This must be accounted for before * reconstructing the normalized address. */ -#define MI300_UMC_MCA_COL GENMASK(5, 1) #define MI300_UMC_MCA_BANK GENMASK(9, 6) #define MI300_UMC_MCA_ROW GENMASK(24, 10) #define MI300_UMC_MCA_PC BIT(25) @@ -320,7 +319,7 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats. */ #define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL)) -static void retire_row_mi300(struct atl_err *a_err) +static void _retire_row_mi300(struct atl_err *a_err) { unsigned long addr; struct page *p; @@ -351,6 +350,22 @@ static void retire_row_mi300(struct atl_err *a_err) } } +/* + * In addition to the column bits, the row[13] bit should also be included when + * calculating addresses affected by a physical row. + * + * Instead of running through another loop over a single bit, just run through + * the column bits twice and flip the row[13] bit in-between. + * + * See MI300_UMC_MCA_ROW for the row bits in MCA_ADDR_UMC value. + */ +static void retire_row_mi300(struct atl_err *a_err) +{ + _retire_row_mi300(a_err); + a_err->addr ^= MI300_UMC_MCA_ROW13; + _retire_row_mi300(a_err); +} + void amd_retire_dram_row(struct atl_err *a_err) { if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) |