[PATCH 2/2] drm/amdgpu: Add parsing SQ_EDC_INFO to SQ IH.

Wed Jun 20 09:42:08 UTC 2018

> -----Original Message-----
> From: Christian König [mailto:ckoenig.leichtzumerken at gmail.com]
> Sent: Wednesday, June 20, 2018 2:37 AM
> To: Grodzovsky, Andrey <Andrey.Grodzovsky at amd.com>; amd-
> gfx at lists.freedesktop.org
> Cc: Panariti, David <David.Panariti at amd.com>; Haehnle, Nicolai
> <Nicolai.Haehnle at amd.com>
> Subject: Re: [PATCH 2/2] drm/amdgpu: Add parsing SQ_EDC_INFO to SQ IH.
> 
> Am 19.06.2018 um 18:09 schrieb Andrey Grodzovsky:
> > Access to SQ_EDC_INFO requires selecting register instance and hence
> > mutex lock when accessing GRBM_GFX_INDEX for which a work is
> > schedueled from IH. But SQ interrupt can be raised on many instances
> > at once which means queuing work will usually succeed for the first
> > one but fail for the reset since the work takes time to process. To
> > avoid losing info about other interrupt instances call the parsing
> > function directly from high IRQ when current work hasn't finished and
> > avoid accessing SQ_EDC_INFO in that case.
> >
> > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  7 +++
> >   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 97
> ++++++++++++++++++++++++++++++-----
> >   2 files changed, 91 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index e8c6cc1..a7b9ef5 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -930,6 +930,11 @@ struct amdgpu_ngg {
> >   	bool			init;
> >   };
> >
> > +struct sq_work {
> > +	struct work_struct	work;
> > +	unsigned ih_data;
> > +};
> > +
> >   struct amdgpu_gfx {
> >   	struct mutex			gpu_clock_mutex;
> >   	struct amdgpu_gfx_config	config;
> > @@ -970,6 +975,8 @@ struct amdgpu_gfx {
> >   	struct amdgpu_irq_src		priv_inst_irq;
> >   	struct amdgpu_irq_src		cp_ecc_error_irq;
> >   	struct amdgpu_irq_src		sq_irq;
> > +	struct sq_work			sq_work;
> > +
> >   	/* gfx status */
> >   	uint32_t			gfx_current_status;
> >   	/* ce ram size*/
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > index 93904a7..0add7fc 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > @@ -704,6 +704,17 @@ static const u32 stoney_mgcg_cgcg_init[] =
> >   	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
> >   };
> >
> > +
> > +static const char * const sq_edc_source_names[] = {
> > +	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
> > +	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
> > +	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data
> return",
> > +	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
> > +	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
> > +	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
> > +	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA", };
> > +
> >   static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
> >   static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
> >   static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); @@
> > -2003,6 +2014,8 @@ static int gfx_v8_0_compute_ring_init(struct
> amdgpu_device *adev, int ring_id,
> >   	return 0;
> >   }
> >
> > +static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
> > +
> >   static int gfx_v8_0_sw_init(void *handle)
> >   {
> >   	int i, j, k, r, ring_id;
> > @@ -2066,6 +2079,8 @@ static int gfx_v8_0_sw_init(void *handle)
> >   		return r;
> >   	}
> >
> > +	INIT_WORK(&adev->gfx.sq_work.work,
> gfx_v8_0_sq_irq_work_func);
> > +
> >   	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
> >
> >   	gfx_v8_0_scratch_init(adev);
> > @@ -6952,14 +6967,11 @@ static int gfx_v8_0_cp_ecc_error_irq(struct
> amdgpu_device *adev,
> >   	return 0;
> >   }
> >
> > -static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
> > -			   struct amdgpu_irq_src *source,
> > -			   struct amdgpu_iv_entry *entry)
> > +static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev,
> > +unsigned ih_data)
> >   {
> > -	u8 enc, se_id;
> > +	u32 enc, se_id, sh_id, cu_id;
> >   	char type[20];
> > -	unsigned ih_data = entry->src_data[0];
> > -
> > +	int sq_edc_source = -1;
> >
> >   	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN,
> ENCODING);
> >   	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN,
> SE_ID); @@
> > -6985,6 +6997,24 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device
> *adev,
> >   		case 1:
> >   		case 2:
> >
> > +			cu_id = REG_GET_FIELD(ih_data,
> SQ_INTERRUPT_WORD_WAVE, CU_ID);
> > +			sh_id = REG_GET_FIELD(ih_data,
> SQ_INTERRUPT_WORD_WAVE, SH_ID);
> > +
> > +			/*
> > +			 * This function can be called either directly from ISR
> > +			 * or from BH in which case we can access
> SQ_EDC_INFO
> > +			 * instance
> > +			 */
> > +			if (in_task()) {
> > +				mutex_lock(&adev->grbm_idx_mutex);
> > +				gfx_v8_0_select_se_sh(adev, se_id, sh_id,
> cu_id);
> > +
> > +				sq_edc_source =
> REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO,
> > +SOURCE);
> > +
> > +				gfx_v8_0_select_se_sh(adev, 0xffffffff,
> 0xffffffff, 0xffffffff);
> > +				mutex_unlock(&adev->grbm_idx_mutex);
> > +			}
> > +
> >   			if (enc == 1)
> >   				sprintf(type, "instruction intr");
> >   			else
> > @@ -6992,20 +7022,61 @@ static int gfx_v8_0_sq_irq(struct
> > amdgpu_device *adev,
> >
> >   			DRM_INFO(
> >   				"SQ %s detected: "
> > -					"se_id %d, cu_id %d, simd_id %d,
> wave_id %d, vm_id %d\n"
> > -					"trap %s, sh_id %d. ",
> > -					type, se_id,
> > -					REG_GET_FIELD(ih_data,
> SQ_INTERRUPT_WORD_WAVE, CU_ID),
> > +					"se_id %d, sh_id %d, cu_id %d,
> simd_id %d, wave_id %d, vm_id %d "
> > +					"trap %s, sq_ed_info.source %s.\n",
> > +					type, se_id, sh_id, cu_id,
> >   					REG_GET_FIELD(ih_data,
> SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
> >   					REG_GET_FIELD(ih_data,
> SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
> >   					REG_GET_FIELD(ih_data,
> SQ_INTERRUPT_WORD_WAVE, VM_ID),
> >   					REG_GET_FIELD(ih_data,
> SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
> > -					REG_GET_FIELD(ih_data,
> SQ_INTERRUPT_WORD_WAVE, SH_ID)
> > -					);
> > +					(sq_edc_source != -1) ?
> sq_edc_source_names[sq_edc_source] : "unavailable"
> > +				);
> >   			break;
> >   		default:
> >   			DRM_ERROR("SQ invalid encoding type\n.");
> > -			return -EINVAL;
> > +	}
> > +}
> > +
> > +static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) {
> > +
> > +	struct amdgpu_device *adev = container_of(work, struct
> amdgpu_device, gfx.sq_work.work);
> > +	struct sq_work *sq_work = container_of(work, struct sq_work,
> work);
> > +
> > +	smp_rmb();
> > +
> > +	gfx_v8_0_parse_sq_irq(adev, READ_ONCE(sq_work->ih_data));
> > +
> > +	WRITE_ONCE(sq_work->ih_data, 0);
> > +	/* Make the value change visible ASAP to IH */
> > +	smp_wmb();
> > +}
> > +
> > +static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
> > +			   struct amdgpu_irq_src *source,
> > +			   struct amdgpu_iv_entry *entry)
> > +{
> > +	unsigned ih_data = entry->src_data[0];
> > +
> > +	/*
> > +	 * Try to submit work so SQ_EDC_INFO can be accessed from
> > +	 * BH. If previous work submission hasn't finished yet
> > +	 * just print whatever info is possible directly from the ISR.
> > +	 */
> > +	smp_rmb();
> > +	if (READ_ONCE(adev->gfx.sq_work.ih_data))
> > +		gfx_v8_0_parse_sq_irq(adev, ih_data);
> > +	else {
> > +		WRITE_ONCE(adev->gfx.sq_work.ih_data, ih_data);
> > +		/* Verify the new value visible in BH handler */
> > +		smp_wmb();
> > +		if (!schedule_work(&adev->gfx.sq_work.work)) {
> > +
> > +			WRITE_ONCE(adev->gfx.sq_work.ih_data, 0);
> > +			gfx_v8_0_parse_sq_irq(adev, ih_data);
> > +			/* Verify 0 is visible to next HW IH */
> > +			smp_wmb();
> > +		}
> 
> At least of hand that looks like it is racy.
> 
> You should probably rather use work_pending() like this:
> 
> if (work_pending(&adev->gfx.sq_work.work)) {
>      gfx_v8_0_parse_sq_irq(adev, ih_data); } else {
>      adev->gfx.sq_work.ih_data = ih_data;
>      schedule_work(&adev->gfx.sq_work.work);
> }
> 
> Regards,
> Christian.

Agree that this way the code more compact and elegant but where is the race ?

Andrey

> 
> >   	}
> >
> >   	return 0;