[PATCH v4 1/2] xe/oa: Fix query mode of operation for OAR/OAC
Dixit, Ashutosh
ashutosh.dixit at intel.com
Fri Dec 20 03:47:44 UTC 2024
On Thu, 19 Dec 2024 16:22:33 -0800, Umesh Nerlige Ramappa wrote:
>
Hi Umesh,
> +static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count)
> {
> struct dma_fence *fence;
> struct xe_bb *bb;
> int err;
>
> - bb = xe_bb_new(stream->gt, 4 * count, false);
> + bb = xe_bb_new(stream->gt, 2 * count + 1, false);
> if (IS_ERR(bb)) {
> err = PTR_ERR(bb);
> goto exit;
> }
>
> - xe_oa_store_flex(stream, lrc, bb, flex, count);
> + write_cs_mi_lri(bb, reg_lri, count);
>
> - fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
> - if (IS_ERR(fence)) {
> - err = PTR_ERR(fence);
> - goto free_bb;
> - }
> - xe_bb_free(bb, fence);
> - dma_fence_put(fence);
> -
> - return 0;
> -free_bb:
> - xe_bb_free(bb, NULL);
> -exit:
> - return err;
> -}
> -
> -static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
> -{
> - struct dma_fence *fence;
> - struct xe_bb *bb;
> - int err;
> -
> - bb = xe_bb_new(stream->gt, 3, false);
> - if (IS_ERR(bb)) {
> - err = PTR_ERR(bb);
> - goto exit;
> - }
> -
> - write_cs_mi_lri(bb, reg_lri, 1);
> -
> - fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
> + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, bb);
This looks like a copy-paste error, could you please change this back to
XE_OA_SUBMIT_NO_DEPS as it used to be.
> if (IS_ERR(fence)) {
> err = PTR_ERR(fence);
> goto free_bb;
> @@ -762,71 +732,57 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
> static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable)
> {
> const struct xe_oa_format *format = stream->oa_buffer.format;
> - struct xe_lrc *lrc = stream->exec_q->lrc[0];
> - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
> u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
> (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
>
> - struct flex regs_context[] = {
> + struct xe_oa_reg reg_lri[] = {
> {
> OACTXCONTROL(stream->hwe->mmio_base),
> - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
> enable ? OA_COUNTER_RESUME : 0,
> },
> + {
> + OAR_OACONTROL,
> + oacontrol,
> + },
> {
> RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
> - regs_offset + CTX_CONTEXT_CONTROL,
> - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE),
> + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
> + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0)
> },
> };
> - struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol };
> - int err;
> -
> - /* Modify stream hwe context image with regs_context */
> - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
> - regs_context, ARRAY_SIZE(regs_context));
> - if (err)
> - return err;
>
> /* Apply reg_lri using LRI */
This comment only made sense when we had xe_oa_modify_ctx_image, maybe
delete it now.
> - return xe_oa_load_with_lri(stream, ®_lri);
> + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri));
> }
>
> static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
> {
> const struct xe_oa_format *format = stream->oa_buffer.format;
> - struct xe_lrc *lrc = stream->exec_q->lrc[0];
> - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
> u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
> (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
> - struct flex regs_context[] = {
> + struct xe_oa_reg reg_lri[] = {
> {
> OACTXCONTROL(stream->hwe->mmio_base),
> - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
> enable ? OA_COUNTER_RESUME : 0,
> },
> + {
> + OAC_OACONTROL,
> + oacontrol
> + },
> {
> RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
> - regs_offset + CTX_CONTEXT_CONTROL,
> - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) |
> + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
> + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) |
> _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0),
> },
> };
> - struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol };
> - int err;
>
> /* Set ccs select to enable programming of OAC_OACONTROL */
> xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl,
> __oa_ccs_select(stream));
>
> - /* Modify stream hwe context image with regs_context */
> - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
> - regs_context, ARRAY_SIZE(regs_context));
> - if (err)
> - return err;
> -
> /* Apply reg_lri using LRI */
Same for this comment.
> - return xe_oa_load_with_lri(stream, ®_lri);
> + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri));
> }
>
> static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable)
> @@ -2110,8 +2066,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
> if (XE_IOCTL_DBG(oa->xe, !param.exec_q))
> return -ENOENT;
>
> - if (param.exec_q->width > 1)
> - drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n");
> + if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1))
> + return -EOPNOTSUPP;
> }
>
> /*
> diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> index 3a75a08b6be9..c8ab37fa0d19 100644
> --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> @@ -223,7 +223,10 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
>
> static u32 get_ppgtt_flag(struct xe_sched_job *job)
> {
> - return job->q->vm ? BIT(8) : 0;
> + if (job->q->vm && !job->ggtt)
> + return BIT(8);
> +
> + return 0;
> }
>
> static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
> diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
> index f13f333f00be..d942b20a9f29 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job_types.h
> +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
> @@ -56,6 +56,8 @@ struct xe_sched_job {
> u32 migrate_flush_flags;
> /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
> bool ring_ops_flush_tlb;
> + /** @ggtt: mapped in ggtt. */
> + bool ggtt;
> /** @ptrs: per instance pointers. */
> struct xe_job_ptrs ptrs[];
> };
> --
> 2.34.1
>
After making the above changes, this is now:
Reviewed-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
If you don't want to send a new version, I think these changes can just be
made while merging.
More information about the Intel-xe
mailing list