[PATCH 2/2] drm/amdgpu: Add parsing SQ_EDC_INFO to SQ IH.
Christian König
ckoenig.leichtzumerken at gmail.com
Wed Jun 20 06:37:20 UTC 2018
Am 19.06.2018 um 18:09 schrieb Andrey Grodzovsky:
> Access to SQ_EDC_INFO requires selecting register instance and
> hence mutex lock when accessing GRBM_GFX_INDEX for which a work
> is schedueled from IH. But SQ interrupt can be raised on many instances
> at once which means queuing work will usually succeed for the first one
> but fail for the reset since the work takes time to process. To avoid
> losing info about other interrupt instances call the parsing function
> directly from high IRQ when current work hasn't finished and avoid
> accessing SQ_EDC_INFO in that case.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 97 ++++++++++++++++++++++++++++++-----
> 2 files changed, 91 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e8c6cc1..a7b9ef5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -930,6 +930,11 @@ struct amdgpu_ngg {
> bool init;
> };
>
> +struct sq_work {
> + struct work_struct work;
> + unsigned ih_data;
> +};
> +
> struct amdgpu_gfx {
> struct mutex gpu_clock_mutex;
> struct amdgpu_gfx_config config;
> @@ -970,6 +975,8 @@ struct amdgpu_gfx {
> struct amdgpu_irq_src priv_inst_irq;
> struct amdgpu_irq_src cp_ecc_error_irq;
> struct amdgpu_irq_src sq_irq;
> + struct sq_work sq_work;
> +
> /* gfx status */
> uint32_t gfx_current_status;
> /* ce ram size*/
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 93904a7..0add7fc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -704,6 +704,17 @@ static const u32 stoney_mgcg_cgcg_init[] =
> mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
> };
>
> +
> +static const char * const sq_edc_source_names[] = {
> + "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
> + "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
> + "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
> + "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
> + "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
> + "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
> + "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
> +};
> +
> static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
> static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
> static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
> @@ -2003,6 +2014,8 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
> return 0;
> }
>
> +static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
> +
> static int gfx_v8_0_sw_init(void *handle)
> {
> int i, j, k, r, ring_id;
> @@ -2066,6 +2079,8 @@ static int gfx_v8_0_sw_init(void *handle)
> return r;
> }
>
> + INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
> +
> adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
>
> gfx_v8_0_scratch_init(adev);
> @@ -6952,14 +6967,11 @@ static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
> return 0;
> }
>
> -static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
> - struct amdgpu_irq_src *source,
> - struct amdgpu_iv_entry *entry)
> +static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
> {
> - u8 enc, se_id;
> + u32 enc, se_id, sh_id, cu_id;
> char type[20];
> - unsigned ih_data = entry->src_data[0];
> -
> + int sq_edc_source = -1;
>
> enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
> se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
> @@ -6985,6 +6997,24 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
> case 1:
> case 2:
>
> + cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
> + sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
> +
> + /*
> + * This function can be called either directly from ISR
> + * or from BH in which case we can access SQ_EDC_INFO
> + * instance
> + */
> + if (in_task()) {
> + mutex_lock(&adev->grbm_idx_mutex);
> + gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
> +
> + sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
> +
> + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
> + mutex_unlock(&adev->grbm_idx_mutex);
> + }
> +
> if (enc == 1)
> sprintf(type, "instruction intr");
> else
> @@ -6992,20 +7022,61 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
>
> DRM_INFO(
> "SQ %s detected: "
> - "se_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d\n"
> - "trap %s, sh_id %d. ",
> - type, se_id,
> - REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID),
> + "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
> + "trap %s, sq_ed_info.source %s.\n",
> + type, se_id, sh_id, cu_id,
> REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
> REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
> REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
> REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
> - REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID)
> - );
> + (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
> + );
> break;
> default:
> DRM_ERROR("SQ invalid encoding type\n.");
> - return -EINVAL;
> + }
> +}
> +
> +static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
> +{
> +
> + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
> + struct sq_work *sq_work = container_of(work, struct sq_work, work);
> +
> + smp_rmb();
> +
> + gfx_v8_0_parse_sq_irq(adev, READ_ONCE(sq_work->ih_data));
> +
> + WRITE_ONCE(sq_work->ih_data, 0);
> + /* Make the value change visible ASAP to IH */
> + smp_wmb();
> +}
> +
> +static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
> + struct amdgpu_irq_src *source,
> + struct amdgpu_iv_entry *entry)
> +{
> + unsigned ih_data = entry->src_data[0];
> +
> + /*
> + * Try to submit work so SQ_EDC_INFO can be accessed from
> + * BH. If previous work submission hasn't finished yet
> + * just print whatever info is possible directly from the ISR.
> + */
> + smp_rmb();
> + if (READ_ONCE(adev->gfx.sq_work.ih_data))
> + gfx_v8_0_parse_sq_irq(adev, ih_data);
> + else {
> + WRITE_ONCE(adev->gfx.sq_work.ih_data, ih_data);
> + /* Verify the new value visible in BH handler */
> + smp_wmb();
> + if (!schedule_work(&adev->gfx.sq_work.work)) {
> +
> + WRITE_ONCE(adev->gfx.sq_work.ih_data, 0);
> + gfx_v8_0_parse_sq_irq(adev, ih_data);
> + /* Verify 0 is visible to next HW IH */
> + smp_wmb();
> + }
At least of hand that looks like it is racy.
You should probably rather use work_pending() like this:
if (work_pending(&adev->gfx.sq_work.work)) {
gfx_v8_0_parse_sq_irq(adev, ih_data);
} else {
adev->gfx.sq_work.ih_data = ih_data;
schedule_work(&adev->gfx.sq_work.work);
}
Regards,
Christian.
> }
>
> return 0;
More information about the amd-gfx
mailing list