[PATCH 2/2] xe/oa: Use MI_LOAD_REGISTER_IMMEDIATE to enable OAR/OAC

Dixit, Ashutosh ashutosh.dixit at intel.com
Thu Dec 5 02:37:02 UTC 2024


On Wed, 04 Dec 2024 13:05:41 -0800, Matthew Brost wrote:
>

Hi Umesh,

> On Tue, Nov 26, 2024 at 04:31:27PM -0800, Umesh Nerlige Ramappa wrote:
> > To enable OAR/OAC, a bit in RING_CONTEXT_CONTROL needs to be set.
> > Setting this bit cause the context image size to change and if not done
> > correct, can cause undesired hangs.
> >
> > Current code uses a separate exec_queue to modify this bit and is
> > error-prone. As per HW recommendation, submit MI_LOAD_REGISTER_IMM to
> > the target hardware context to modify the relevant bit.

Looks like Matt is fine with the overall approach of this series and the
locking here should also be ok after the suggested changes. I can review
the OA specific stuff. A couple of comments below.

> >
> > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_oa.c | 68 ++++++++++++++++++++++++++++----------
> >  1 file changed, 50 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> > index 8dd55798ab31..73c07fee6409 100644
> > --- a/drivers/gpu/drm/xe/xe_oa.c
> > +++ b/drivers/gpu/drm/xe/xe_oa.c
> > @@ -719,27 +719,55 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
> >	return err;
> >  }
> >
> > -static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
> > +static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count)
> >  {
> > +	struct xe_exec_queue *q = stream->exec_q;

The two OAR programming batches are followed by another batch to configure
the OA registers. We fence on only the last batch. The assumption is that
all batches are submitted to the same exec queue, so we only need to fence
on the last one.

So we need to probably send all batches on either the kernel exec_q (when
user exec_q is not availble) or the user exec_q (when user exec_q is
available).

We'll need to see if there's any reason we cannot do it and then revisit.

> > +	struct xe_sched_job *job;
> >	struct dma_fence *fence;
> >	struct xe_bb *bb;
> > +	long timeout;
> >	int err;
> >
> > -	bb = xe_bb_new(stream->gt, 3, false);
> > +	bb = xe_bb_new(stream->gt, 2 * count + 1, false);
> >	if (IS_ERR(bb)) {
> >		err = PTR_ERR(bb);
> >		goto exit;
> >	}
> >
> > -	write_cs_mi_lri(bb, reg_lri, 1);
> > +	write_cs_mi_lri(bb, reg_lri, count);
> >
> > -	fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
> > -	if (IS_ERR(fence)) {
> > -		err = PTR_ERR(fence);
> > +	if (q->vm) {
> > +		down_read(&q->vm->lock);
> > +		xe_vm_lock(q->vm, false);
> > +	}
> > +
> > +	job = xe_bb_create_job(q, bb);
> > +	job->ggtt = true;
>
> Need to set this after the IS_ERR(job) check.
>
> > +	if (IS_ERR(job)) {
> > +		err = PTR_ERR(job);
> >		goto free_bb;
>
> You need to unlock the VM here.
>
> Matt
>
> >	}
> > -	xe_bb_free(bb, fence);
> > +
> > +	xe_sched_job_arm(job);
> > +	fence = dma_fence_get(&job->drm.s_fence->finished);
> > +	xe_sched_job_push(job);
> > +
> > +	if (q->vm) {
> > +		xe_vm_unlock(q->vm);
> > +		up_read(&q->vm->lock);
> > +	}
> > +
> > +	timeout = dma_fence_wait_timeout(fence, false, HZ);

The code should not block here, it should be just push the job and move
on. So the code is completely non-blocking if any output fences are
specified for stream open. The fences should signal correctly by
design. See calls to xe_oa_submit_bb().

> >	dma_fence_put(fence);
> > +	xe_bb_free(bb, fence);
> > +
> > +	if (timeout < 0) {
> > +		drm_dbg(&stream->oa->xe->drm, "OA load with LRI failed %ld\n", timeout);
> > +		return timeout;
> > +	} else if (!timeout) {
> > +		drm_dbg(&stream->oa->xe->drm, "OA load with LRI timed out\n");
> > +		return -ETIME;
> > +	}
> >
> >	return 0;
> >  free_bb:
> > @@ -751,8 +779,6 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
> >  static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable)
> >  {
> >	const struct xe_oa_format *format = stream->oa_buffer.format;
> > -	struct xe_lrc *lrc = stream->exec_q->lrc[0];
> > -	u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
> >	u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
> >		(enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
> >
> > @@ -762,13 +788,17 @@ static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable)
> >			stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
> >			enable ? OA_COUNTER_RESUME : 0,
> >		},
> > +	};
> > +	struct xe_oa_reg reg_lri[] = {
> > +		{
> > +			OAR_OACONTROL,
> > +			oacontrol,
> > +		},
> >		{
> >			RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
> > -			regs_offset + CTX_CONTEXT_CONTROL,
> >			_MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE),
> >		},
> >	};
> > -	struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol };
> >	int err;
> >
> >	/* Modify stream hwe context image with regs_context */
> > @@ -778,14 +808,12 @@ static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable)
> >		return err;
> >
> >	/* Apply reg_lri using LRI */
> > -	return xe_oa_load_with_lri(stream, &reg_lri);
> > +	return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri));
> >  }
> >
> >  static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
> >  {
> >	const struct xe_oa_format *format = stream->oa_buffer.format;
> > -	struct xe_lrc *lrc = stream->exec_q->lrc[0];
> > -	u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
> >	u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
> >		(enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
> >	struct flex regs_context[] = {
> > @@ -794,14 +822,18 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
> >			stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
> >			enable ? OA_COUNTER_RESUME : 0,
> >		},
> > +	};
> > +	struct xe_oa_reg reg_lri[] = {
> > +		{
> > +			OAC_OACONTROL,
> > +			oacontrol
> > +		},
> >		{
> >			RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
> > -			regs_offset + CTX_CONTEXT_CONTROL,
> >			_MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) |
> >			_MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0),
> >		},
> > -	};
> > -	struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol };
> > +		};
> >	int err;
> >
> >	/* Set ccs select to enable programming of OAC_OACONTROL */
> > @@ -815,7 +847,7 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
> >		return err;
> >
> >	/* Apply reg_lri using LRI */
> > -	return xe_oa_load_with_lri(stream, &reg_lri);
> > +	return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri));
> >  }
> >
> >  static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable)
> > --
> > 2.34.1
> >

Thanks.
--
Ashutosh


More information about the Intel-xe mailing list