summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h57
1 files changed, 57 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index ce4179db2a6d..ec203f9e5ffa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -54,6 +54,41 @@
/* Page retirement tag */
#define UMC_ECC_NEW_DETECTED_TAG 0x1
+/*
+ * a flag to indicate v2 of channel index stored in eeprom
+ *
+ * v1 (legacy way): store channel index within a umc instance in eeprom
+ * range in UMC v12: 0 ~ 7
+ * v2: store global channel index in eeprom
+ * range in UMC v12: 0 ~ 127
+ *
+ * NOTE: it's better to store it in eeprom_table_record.mem_channel,
+ * but there is only 8 bits in mem_channel, and the channel number may
+ * increase in the future, we decide to save it in
+ * eeprom_table_record.retired_page. retired_page is useless in v2,
+ * we depend on eeprom_table_record.address instead of retired_page in v2.
+ * Only 48 bits are saved on eeprom, use bit 47 here.
+ */
+#define UMC_CHANNEL_IDX_V2 BIT_ULL(47)
+
+/*
+ * save nps value to eeprom_table_record.retired_page[47:40],
+ * the channel index flag above will be retired.
+ */
+#define UMC_NPS_SHIFT 40
+#define UMC_NPS_MASK 0xffULL
+
+/* three column bits and one row bit in MCA address flip
+ * in bad page retirement
+ */
+#define RETIRE_FLIP_BITS_NUM 4
+
+struct amdgpu_umc_flip_bits {
+ uint32_t flip_bits_in_pa[RETIRE_FLIP_BITS_NUM];
+ uint32_t flip_row_bit;
+ uint32_t r13_in_pa;
+ uint32_t bit_num;
+};
typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
uint32_t umc_inst, uint32_t ch_inst, void *data);
@@ -70,6 +105,14 @@ struct amdgpu_umc_ras {
enum amdgpu_mca_error_type type, void *ras_error_status);
int (*update_ecc_status)(struct amdgpu_device *adev,
uint64_t status, uint64_t ipid, uint64_t addr);
+ int (*convert_ras_err_addr)(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out,
+ bool dump_addr);
+ uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
+ uint64_t mca_addr, uint64_t retired_page);
+ void (*get_retire_flip_bits)(struct amdgpu_device *adev);
};
struct amdgpu_umc_funcs {
@@ -100,6 +143,10 @@ struct amdgpu_umc {
/* active mask for umc node instance */
unsigned long active_mask;
+
+ struct amdgpu_umc_flip_bits flip_bits;
+
+ unsigned long err_addr_cnt;
};
int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
@@ -134,4 +181,14 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
void *ras_error_status);
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t pa_addr);
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+ uint64_t pa_addr, uint64_t *pfns, int len);
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch, uint32_t umc,
+ uint32_t node, uint32_t socket,
+ struct ta_ras_query_address_output *addr_out, bool dump_addr);
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+ uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps);
#endif