CZ EDC param and support

Mon May 1 14:40:23 UTC 2017

> I’ve gotten a comment that inline patches are preferred.
>
Well, they are. But you should send them with "git send-email" and not 
squashed together all in one mail :)

Otherwise I can't see how we should be able to apply them.

Additional to that at least I'm perfectly fine with attached patches as 
well.

Christian.

Am 28.04.2017 um 16:18 schrieb Panariti, David:
>
> Actually, the attachment was an oversight.
>
> It’s easier for me to attach, open the attachment and then delete the 
> attachment.
>
> I got only 2/3 this time.
>
> I’ve gotten a comment that inline patches are preferred.
>
> Sorry for the inconvenience.
>
> davep
>
> *From:*Koenig, Christian
> *Sent:* Friday, April 28, 2017 4:06 AM
> *To:* Panariti, David <David.Panariti at amd.com>; gpudriverdevsupport 
> <gpudriverdevsupport at amd.com>; amd-gfx at lists.freedesktop.org
> *Subject:* Re: CZ EDC param and support
>
> You somehow messed up the attachment.
>
> Instead of individual files everything is squashed together as 
> all-edc.patch.
>
> Please fix that otherwise proper review won't be possible.
>
> Christian.
>
> Am 28.04.2017 um 00:13 schrieb Panariti, David:
>
>     The changes in the workarounds function use DRM_INFO rather than
>     DRM_DEBUG because CZs with EDC are often used in embedded
>     environments and any info can be useful especially in the case of
>     an intermittent problem.
>
>     From e1ce383592c275b58ad95bd80b5479af8c1f9dae Mon Sep 17 00:00:00 2001
>
>     From: David Panariti <David.Panariti at amd.com>
>     <mailto:David.Panariti at amd.com>
>
>     Date: Fri, 14 Apr 2017 13:41:52 -0400
>
>     Subject: [PATCH 1/3] drm/amdgpu: Moved gfx_v8_0_select_se_sh() in
>     lieu of
>
>     re-redundant prototype.
>
>     Will be needed for the rest of the EDC workarounds patch.
>
>     Change-Id: Ie586ab38a69e98a91c6cb5747e285ce8bfdd1c86
>
>     Signed-off-by: David Panariti <David.Panariti at amd.com>
>     <mailto:David.Panariti at amd.com>
>
>     ---
>
>     drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 46
>     +++++++++++++++++------------------
>
>     1 file changed, 23 insertions(+), 23 deletions(-)
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>     b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>
>     index 2ff5f19..27b57cb 100644
>
>     --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>
>     +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>
>     @@ -1500,6 +1500,29 @@ static int gfx_v8_0_kiq_init(struct
>     amdgpu_device *adev)
>
>                    return 0;
>
>     }
>
>     +static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
>
>     +   u32 se_num, u32 sh_num, u32 instance)
>
>     +{
>
>     +             u32 data;
>
>     +
>
>     +             if (instance == 0xffffffff)
>
>     +                             data = REG_SET_FIELD(0,
>     GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
>
>     +             else
>
>     +                             data = REG_SET_FIELD(0,
>     GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
>
>     +
>
>     +             if (se_num == 0xffffffff)
>
>     +                             data = REG_SET_FIELD(data,
>     GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
>
>     +             else
>
>     +                             data = REG_SET_FIELD(data,
>     GRBM_GFX_INDEX, SE_INDEX, se_num);
>
>     +
>
>     +             if (sh_num == 0xffffffff)
>
>     +                             data = REG_SET_FIELD(data,
>     GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
>
>     +             else
>
>     +                             data = REG_SET_FIELD(data,
>     GRBM_GFX_INDEX, SH_INDEX, sh_num);
>
>     +
>
>     +             WREG32(mmGRBM_GFX_INDEX, data);
>
>     +}
>
>     +
>
>     static const u32 vgpr_init_compute_shader[] =
>
>     {
>
>                    0x7e000209, 0x7e020208,
>
>     @@ -3556,29 +3579,6 @@ static void
>     gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
>
>                    }
>
>     }
>
>     -static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
>
>     -   u32 se_num, u32 sh_num, u32 instance)
>
>     -{
>
>     -              u32 data;
>
>     -
>
>     -              if (instance == 0xffffffff)
>
>     -                              data = REG_SET_FIELD(0,
>     GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
>
>     -              else
>
>     -                              data = REG_SET_FIELD(0,
>     GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
>
>     -
>
>     -              if (se_num == 0xffffffff)
>
>     -                              data = REG_SET_FIELD(data,
>     GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
>
>     -              else
>
>     -                              data = REG_SET_FIELD(data,
>     GRBM_GFX_INDEX, SE_INDEX, se_num);
>
>     -
>
>     -              if (sh_num == 0xffffffff)
>
>     -                              data = REG_SET_FIELD(data,
>     GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
>
>     -              else
>
>     -                              data = REG_SET_FIELD(data,
>     GRBM_GFX_INDEX, SH_INDEX, sh_num);
>
>     -
>
>     -              WREG32(mmGRBM_GFX_INDEX, data);
>
>     -}
>
>     -
>
>     static u32 gfx_v8_0_create_bitmask(u32 bit_width)
>
>     {
>
>                    return (u32)((1ULL << bit_width) - 1);
>
>     -- 
>
>     2.7.4
>
>     From 38fac8cab73dbc07e0ee7599b52106bc09dd32ea Mon Sep 17 00:00:00 2001
>
>     From: David Panariti <David.Panariti at amd.com>
>     <mailto:David.Panariti at amd.com>
>
>     Date: Mon, 24 Apr 2017 11:05:45 -0400
>
>     Subject: [PATCH 2/3] drm/amdgpu: Complete Carrizo EDC (Error
>     Detection and
>
>     Correction) workarounds.
>
>     The workarounds are unconditionally performed on CZs with EDC enabled.
>
>     EDC detects uncorrected ECC errors and uses data poisoning to prevent
>
>     corrupted compute results from being used (read).
>
>     EDC enabled CZs are often used in embedded environments.
>
>     Change-Id: I84c261785329beeb797f11efbe0ec35790f2996c
>
>     Signed-off-by: David Panariti <David.Panariti at amd.com>
>     <mailto:David.Panariti at amd.com>
>
>     ---
>
>     drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 148
>     ++++++++++++++++++++++++----------
>
>     1 file changed, 106 insertions(+), 42 deletions(-)
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>     b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>
>     index 27b57cb..2f5bf5f 100644
>
>     --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>
>     +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>
>     @@ -1645,35 +1645,92 @@ static const u32 sgpr2_init_regs[] =
>
>                    mmCOMPUTE_USER_DATA_9, 0xedcedc09,
>
>     };
>
>     -static const u32 sec_ded_counter_registers[] =
>
>     -{
>
>     -              mmCPC_EDC_ATC_CNT,
>
>     -              mmCPC_EDC_SCRATCH_CNT,
>
>     -              mmCPC_EDC_UCODE_CNT,
>
>     -              mmCPF_EDC_ATC_CNT,
>
>     -              mmCPF_EDC_ROQ_CNT,
>
>     -              mmCPF_EDC_TAG_CNT,
>
>     -              mmCPG_EDC_ATC_CNT,
>
>     -              mmCPG_EDC_DMA_CNT,
>
>     -              mmCPG_EDC_TAG_CNT,
>
>     -              mmDC_EDC_CSINVOC_CNT,
>
>     -              mmDC_EDC_RESTORE_CNT,
>
>     -              mmDC_EDC_STATE_CNT,
>
>     -              mmGDS_EDC_CNT,
>
>     -              mmGDS_EDC_GRBM_CNT,
>
>     -              mmGDS_EDC_OA_DED,
>
>     -              mmSPI_EDC_CNT,
>
>     - mmSQC_ATC_EDC_GATCL1_CNT,
>
>     -              mmSQC_EDC_CNT,
>
>     -              mmSQ_EDC_DED_CNT,
>
>     -              mmSQ_EDC_INFO,
>
>     -              mmSQ_EDC_SEC_CNT,
>
>     -              mmTCC_EDC_CNT,
>
>     - mmTCP_ATC_EDC_GATCL1_CNT,
>
>     -              mmTCP_EDC_CNT,
>
>     -              mmTD_EDC_CNT
>
>     +struct reg32_counter_name_map {
>
>     +             uint32_t rnmap_addr;   /* Counter register address */
>
>     +             const char *rnmap_name;           /* Name of the
>     counter */
>
>     +             size_t rnmap_num_instances;  /* Number of block
>     instances */
>
>     };
>
>     +#define DEF_mmREG32_NAME_MAP_ELEMENT(reg, num_instances)
>     {             \
>
>     +             .rnmap_addr = mm##reg, \
>
>     +             .rnmap_name = #reg, \
>
>     +             .rnmap_num_instances =
>     num_instances                                            \
>
>     +}
>
>     +
>
>     +/* See GRBM_GFX_INDEX, et. al. registers. */
>
>     +static const struct reg32_counter_name_map
>     sec_ded_counter_registers[] = {
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(SQC_EDC_CNT, 2),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(SQC_ATC_EDC_GATCL1_CNT, 2),
>
>     +
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(SQ_EDC_SEC_CNT, 8),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(SQ_EDC_DED_CNT, 8),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(TCP_EDC_CNT, 8),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(TCP_ATC_EDC_GATCL1_CNT, 8),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(TD_EDC_CNT, 8),
>
>     +
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(TCC_EDC_CNT, 4),
>
>     +
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(CPC_EDC_ATC_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(CPC_EDC_SCRATCH_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(CPC_EDC_UCODE_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(CPF_EDC_ATC_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(CPF_EDC_ROQ_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(CPF_EDC_TAG_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(CPG_EDC_ATC_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(CPG_EDC_DMA_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(CPG_EDC_TAG_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(DC_EDC_CSINVOC_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(DC_EDC_STATE_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(DC_EDC_RESTORE_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(GDS_EDC_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(GDS_EDC_GRBM_CNT, 1),
>
>     + DEF_mmREG32_NAME_MAP_ELEMENT(SPI_EDC_CNT, 1),
>
>     +};
>
>     +
>
>     +static int gfx_v8_0_edc_clear_counters(struct amdgpu_device *adev)
>
>     +{
>
>     +             int ci, se, sh, i;
>
>     +             uint32_t count;
>
>     +             int r = 0;
>
>     +
>
>     + mutex_lock(&adev->grbm_idx_mutex);
>
>     +
>
>     +             for (ci = 0; ci <
>     ARRAY_SIZE(sec_ded_counter_registers); ++ci) {
>
>     +                             const struct reg32_counter_name_map
>     *cp =
>
>     + sec_ded_counter_registers + ci;
>
>     +                             const char *name = cp->rnmap_name;
>
>     +
>
>     +                             for (se = 0; se <
>     adev->gfx.config.max_shader_engines; ++se) {
>
>     + for (sh = 0; sh < adev->gfx.config.max_sh_per_se; ++sh) {
>
>     + for (i = 0; i < cp->rnmap_num_instances; ++i) {
>
>     + gfx_v8_0_select_se_sh(adev, se, sh, i);
>
>     + count = RREG32(cp->rnmap_addr);
>
>     + count = RREG32(cp->rnmap_addr);
>
>     + if (count != 0) {
>
>     + /*
>
>     + * Workaround failed.
>
>     + * If people are interested
>
>     + * in EDC at all, they will
>
>     + * want to know which
>
>     + * counters had problems.
>
>     + */
>
>     + DRM_WARN("EDC counter %s is 0x%08x, but should be 0\n.",
>
>     + name, count);
>
>     + r = -EINVAL;
>
>     + goto ret;
>
>     + }
>
>     + }
>
>     + }
>
>     +                             }
>
>     +             }
>
>     +
>
>     +ret:
>
>     + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
>
>     + mutex_unlock(&adev->grbm_idx_mutex);
>
>     +
>
>     +             return r;
>
>     +}
>
>     +
>
>     static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
>
>     {
>
>                    struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
>
>     @@ -1681,18 +1738,36 @@ static int
>     gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
>
>                    struct fence *f = NULL;
>
>                    int r, i;
>
>                    u32 tmp;
>
>     +             u32 dis_bit;
>
>                    unsigned total_size, vgpr_offset, sgpr_offset;
>
>                    u64 gpu_addr;
>
>     -              /* only supported on CZ */
>
>     -              if (adev->asic_type != CHIP_CARRIZO)
>
>     +             if (adev->asic_type != CHIP_CARRIZO) {
>
>     +                             /* EDC is only supported on CZ */
>
>     +                             return 0;
>
>     +             }
>
>     +
>
>     +        DRM_INFO("Detected Carrizo.\n");
>
>     +
>
>     +             tmp = RREG32(mmCC_GC_EDC_CONFIG);
>
>     +             dis_bit = REG_GET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC);
>
>     +             if (dis_bit) {
>
>     +                             /* On Carrizo, EDC may be disabled
>     by a fuse. */
>
>     + DRM_INFO("EDC hardware is disabled, GC_EDC_CONFIG: 0x%08x.\n",
>
>     + tmp);
>
>                                    return 0;
>
>     +             }
>
>                     /* bail if the compute ring is not ready */
>
>                    if (!ring->ready)
>
>                                    return 0;
>
>     -              tmp = RREG32(mmGB_EDC_MODE);
>
>     +             DRM_INFO("Applying EDC workarounds.\n");
>
>     +
>
>     +             /*
>
>     +             * Interested parties can enable EDC using debugfs
>     register reads and
>
>     +             * writes.
>
>     +             */
>
>                    WREG32(mmGB_EDC_MODE, 0);
>
>                     total_size =
>
>     @@ -1817,18 +1892,7 @@ static int
>     gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
>
>                                    goto fail;
>
>                    }
>
>     -              tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
>
>     -              tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
>
>     -              WREG32(mmGB_EDC_MODE, tmp);
>
>     -
>
>     -              tmp = RREG32(mmCC_GC_EDC_CONFIG);
>
>     -              tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC,
>     0) | 1;
>
>     - WREG32(mmCC_GC_EDC_CONFIG, tmp);
>
>     -
>
>     -
>
>     -              /* read back registers to clear the counters */
>
>     -              for (i = 0; i <
>     ARRAY_SIZE(sec_ded_counter_registers); i++)
>
>     - RREG32(sec_ded_counter_registers[i]);
>
>     + gfx_v8_0_edc_clear_counters(adev);
>
>      fail:
>
>                    amdgpu_ib_free(adev, &ib, NULL);
>
>     -- 
>
>     2.7.4
>
>     From ec4803205582c1011f5ced1ead70ee244268b4b8 Mon Sep 17 00:00:00 2001
>
>     From: David Panariti <David.Panariti at amd.com>
>     <mailto:David.Panariti at amd.com>
>
>     Date: Wed, 26 Apr 2017 10:13:06 -0400
>
>     Subject: [PATCH 3/3] drm/amdgpu: Add kernel parameter to control
>     use of
>
>     ECC/EDC.
>
>     Allow various kinds of memory integrity methods (e.g. ECC/EDC) to
>     be enabled
>
>     or disabled.  By default, all features are disabled.
>
>     EDC is Error Detection and Correction. It can detect ECC errors
>     and do 0 or
>
>     more of: count SEC (single error corrected) and DED (double error
>     detected,
>
>     i.e. uncorrected ECC error), halt the affected block, interrupt
>     the CPU.
>
>     Currently, only counting errors is supported.
>
>     Signed-off-by: David Panariti <David.Panariti at amd.com>
>     <mailto:David.Panariti at amd.com>
>
>     ---
>
>     drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
>
>     drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  4 ++++
>
>     drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 34
>     +++++++++++++++++++++++++++-----
>
>     drivers/gpu/drm/amd/include/amd_shared.h | 14 +++++++++++++
>
>     4 files changed, 48 insertions(+), 5 deletions(-)
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>
>     index 4a16e3c..0322392 100644
>
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>
>     @@ -111,6 +111,7 @@ extern int amdgpu_prim_buf_per_se;
>
>     extern int amdgpu_pos_buf_per_se;
>
>     extern int amdgpu_cntl_sb_buf_per_se;
>
>     extern int amdgpu_param_buf_per_se;
>
>     +extern unsigned amdgpu_ecc_flags;
>
>      #define AMDGPU_DEFAULT_GTT_SIZE_MB                      3072ULL
>     /* 3GB by default */
>
>     #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS                   3000
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>
>     index ead00d7..00e16ac 100644
>
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>
>     @@ -110,6 +110,7 @@ int amdgpu_prim_buf_per_se = 0;
>
>     int amdgpu_pos_buf_per_se = 0;
>
>     int amdgpu_cntl_sb_buf_per_se = 0;
>
>     int amdgpu_param_buf_per_se = 0;
>
>     +unsigned amdgpu_ecc_flags = 0;
>
>      MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in
>     megabytes");
>
>     module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
>
>     @@ -235,6 +236,9 @@ module_param_named(cntl_sb_buf_per_se,
>     amdgpu_cntl_sb_buf_per_se, int, 0444);
>
>     MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater
>     Cache per Shader Engine (default depending on gfx)");
>
>     module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int,
>     0444);
>
>     +MODULE_PARM_DESC(ecc_flags, "ECC/EDC enable flags (0 = disable
>     ECC/EDC (default))");
>
>     +module_param_named(ecc_flags, amdgpu_ecc_flags, uint, 0444);
>
>     +
>
>      static const struct pci_device_id pciidlist[] = {
>
>     #ifdef  CONFIG_DRM_AMDGPU_SI
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>     b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>
>     index 2f5bf5f..05cab7e 100644
>
>     --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>
>     +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>
>     @@ -1708,7 +1708,7 @@ static int
>     gfx_v8_0_edc_clear_counters(struct amdgpu_device *adev)
>
>     count = RREG32(cp->rnmap_addr);
>
>     if (count != 0) {
>
>     /*
>
>     - * Workaround failed.
>
>     + * EDC workaround failed.
>
>      * If people are interested
>
>      * in EDC at all, they will
>
>      * want to know which
>
>     @@ -1747,14 +1747,24 @@ static int
>     gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
>
>                                    return 0;
>
>                    }
>
>     -        DRM_INFO("Detected Carrizo.\n");
>
>     +             DRM_INFO("Detected Carrizo.\n");
>
>                     tmp = RREG32(mmCC_GC_EDC_CONFIG);
>
>                    dis_bit = REG_GET_FIELD(tmp, CC_GC_EDC_CONFIG,
>     DIS_EDC);
>
>                    if (dis_bit) {
>
>     -                              /* On Carrizo, EDC may be disabled
>     by a fuse. */
>
>     - DRM_INFO("EDC hardware is disabled, GC_EDC_CONFIG: 0x%08x.\n",
>
>     - tmp);
>
>     +                             /* On Carrizo, EDC may be disabled
>     permanently by a fuse. */
>
>     + DRM_INFO("Carrizo EDC hardware is disabled, GC_EDC_CONFIG:
>     0x%08x.\n",
>
>     + tmp);
>
>     +                             return 0;
>
>     +             }
>
>     +
>
>     +             /*
>
>     +             * Check if EDC has been requested by a kernel parameter.
>
>     +             * For Carrizo, EDC is the best/safest mode WRT error
>     handling.
>
>     +             */
>
>     +             if (!(amdgpu_ecc_flags
>
>     +                   & (AMD_ECC_SUPPORT_BEST | AMD_ECC_SUPPORT_EDC))) {
>
>     + DRM_INFO("EDC support has not been requested.\n");
>
>                                    return 0;
>
>                    }
>
>     @@ -1892,6 +1902,20 @@ static int
>     gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
>
>                                    goto fail;
>
>                    }
>
>     +             /* 00 - GB_EDC_DED_MODE_LOG: Count DED errors but do
>     not halt */
>
>     +             tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 0);
>
>     +             /* Do not propagate the errors to the next block. */
>
>     +             tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 0);
>
>     +             WREG32(mmGB_EDC_MODE, tmp);
>
>     +
>
>     +             tmp = RREG32(mmCC_GC_EDC_CONFIG);
>
>     +
>
>     +             /*
>
>     +             * Clear EDC_DISABLE bit so the counters are available.
>
>     +             */
>
>     +             tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0);
>
>     + WREG32(mmCC_GC_EDC_CONFIG, tmp);
>
>     +
>
>     gfx_v8_0_edc_clear_counters(adev);
>
>      fail:
>
>     diff --git a/drivers/gpu/drm/amd/include/amd_shared.h
>     b/drivers/gpu/drm/amd/include/amd_shared.h
>
>     index 2ccf44e..c4fd013 100644
>
>     --- a/drivers/gpu/drm/amd/include/amd_shared.h
>
>     +++ b/drivers/gpu/drm/amd/include/amd_shared.h
>
>     @@ -179,6 +179,20 @@ struct amd_pp_profile {
>
>     #define AMD_PG_SUPPORT_GFX_QUICK_MG (1 << 11)
>
>     #define AMD_PG_SUPPORT_GFX_PIPELINE                     (1 << 12)
>
>     +/*
>
>     + * ECC flags
>
>     + * Allows the user to choose what kind of error
>     detection/correction is used.
>
>     + * Currently, EDC is supported on Carrizo.
>
>     + *
>
>     + * The AMD_ECC_SUPPORT_BEST bit is used to allow a user to have
>     the driver
>
>     + * set what it thinks is best/safest mode.  This may not be the
>     same as the
>
>     + * default, depending on the GPU and the application.
>
>     + * Using a single bit makes it easy to request the best support
>     without
>
>     + * needing to know all currently supported modes.
>
>     + */
>
>     +#define AMD_ECC_SUPPORT_BEST (1 << 0)
>
>     +#define AMD_ECC_SUPPORT_EDC (1 << 1)
>
>     +
>
>     enum amd_pm_state_type {
>
>                    /* not used for dpm */
>
>     POWER_STATE_TYPE_DEFAULT,
>
>     -- 
>
>     2.7.4
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20170501/9a09c534/attachment-0001.html>