[PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
Deucher, Alexander
Alexander.Deucher at amd.com
Mon Mar 27 15:59:57 UTC 2017
> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:39 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
>
> 1) Adapt to vulkan:
> Now use double SWITCH BUFFER to replace the 128 nops w/a,
> because when vulkan introduced, umd can insert 7 ~ 16 IBs
> per submit which makes 256 DW size cannot hold the whole
> DMAframe (if we still insert those 128 nops), CP team suggests
> use double SWITCH_BUFFERs, instead of tricky 128 NOPs w/a.
>
> 2) To fix the CE VM fault issue when MCBP introduced:
> Need one more COND_EXEC wrapping IB part (original one us
> for VM switch part).
>
> this change can fix vm fault issue caused by below scenario
> without this change:
>
> >CE passed original COND_EXEC (no MCBP issued this moment),
> proceed as normal.
>
> >DE catch up to this COND_EXEC, but this time MCBP issued,
> thus DE treats all following packages as NOP. The following
> VM switch packages now looks just as NOP to DE, so DE
> dosen't do VM flush at all.
>
> >Now CE proceeds to the first IBc, and triggers VM fault,
> because DE didn't do VM flush for this DMAframe.
>
> 3) change estimated alloc size for gfx9.
> with new DMAframe scheme, we need modify emit_frame_size
> for gfx9
>
> with above changes, no more 128 NOPs w/a after VM flush
>
> Change-Id: Ib3f92d9d5a81bfff0369a00f23e1e5891797089a
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
I haven't really kept up on this whole saga with the DE and CE so assuming there are no regressions on bare metal, patches 12, 13 are:
Acked-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 ++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77
> +++++++++++++++++++++-------------
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 29 ++++++++-----
> 3 files changed, 69 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index d103270..b300929 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -167,9 +167,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs,
> return r;
> }
>
> - if (ring->funcs->init_cond_exec)
> - patch_offset = amdgpu_ring_init_cond_exec(ring);
> -
> if (vm) {
> amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go
> too fast than DE */
>
> @@ -180,7 +177,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs,
> }
> }
>
> - if (ring->funcs->emit_hdp_flush
> + if (ring->funcs->init_cond_exec)
> + patch_offset = amdgpu_ring_init_cond_exec(ring);
> +
> + if (ring->funcs->emit_hdp_flush
> #ifdef CONFIG_X86_64
> && !(adev->flags & AMD_IS_APU)
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9ff445c..74be4fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -483,42 +483,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
> struct amdgpu_job *job)
> id->oa_size != job->oa_size);
> int r;
>
> - if (ring->funcs->emit_pipeline_sync && (
> - job->vm_needs_flush || gds_switch_needed ||
> - amdgpu_vm_ring_has_compute_vm_bug(ring)))
> - amdgpu_ring_emit_pipeline_sync(ring);
> + if (job->vm_needs_flush || gds_switch_needed ||
> + amdgpu_vm_is_gpu_reset(adev, id) ||
> + amdgpu_vm_ring_has_compute_vm_bug(ring)) {
> + unsigned patch_offset = 0;
>
> - if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
> - amdgpu_vm_is_gpu_reset(adev, id))) {
> - struct fence *fence;
> - u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job-
> >vm_pd_addr);
> + if (ring->funcs->init_cond_exec)
> + patch_offset = amdgpu_ring_init_cond_exec(ring);
>
> - trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
> - amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
> + if (ring->funcs->emit_pipeline_sync &&
> + (job->vm_needs_flush || gds_switch_needed ||
> + amdgpu_vm_ring_has_compute_vm_bug(ring)))
> + amdgpu_ring_emit_pipeline_sync(ring);
>
> - r = amdgpu_fence_emit(ring, &fence);
> - if (r)
> - return r;
> + if (ring->funcs->emit_vm_flush && (job->vm_needs_flush
> ||
> + amdgpu_vm_is_gpu_reset(adev, id))) {
> + struct fence *fence;
> + u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev,
> job->vm_pd_addr);
>
> - mutex_lock(&adev->vm_manager.lock);
> - fence_put(id->last_flush);
> - id->last_flush = fence;
> - mutex_unlock(&adev->vm_manager.lock);
> - }
> + trace_amdgpu_vm_flush(pd_addr, ring->idx, job-
> >vm_id);
> + amdgpu_ring_emit_vm_flush(ring, job->vm_id,
> pd_addr);
>
> - if (gds_switch_needed) {
> - id->gds_base = job->gds_base;
> - id->gds_size = job->gds_size;
> - id->gws_base = job->gws_base;
> - id->gws_size = job->gws_size;
> - id->oa_base = job->oa_base;
> - id->oa_size = job->oa_size;
> - amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> - job->gds_base, job->gds_size,
> - job->gws_base, job->gws_size,
> - job->oa_base, job->oa_size);
> - }
> + r = amdgpu_fence_emit(ring, &fence);
> + if (r)
> + return r;
>
> + mutex_lock(&adev->vm_manager.lock);
> + fence_put(id->last_flush);
> + id->last_flush = fence;
> + mutex_unlock(&adev->vm_manager.lock);
> + }
> +
> + if (gds_switch_needed) {
> + id->gds_base = job->gds_base;
> + id->gds_size = job->gds_size;
> + id->gws_base = job->gws_base;
> + id->gws_size = job->gws_size;
> + id->oa_base = job->oa_base;
> + id->oa_size = job->oa_size;
> + amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> + job->gds_base, job-
> >gds_size,
> + job->gws_base, job-
> >gws_size,
> + job->oa_base, job-
> >oa_size);
> + }
> +
> + if (ring->funcs->patch_cond_exec)
> + amdgpu_ring_patch_cond_exec(ring, patch_offset);
> +
> + /* the double SWITCH_BUFFER here *cannot* be skipped by
> COND_EXEC */
> + if (ring->funcs->emit_switch_buffer) {
> + amdgpu_ring_emit_switch_buffer(ring);
> + amdgpu_ring_emit_switch_buffer(ring);
> + }
> + }
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ce6fa03..eb8551b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3134,8 +3134,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
> /* sync PFP to ME, otherwise we might get invalid PFP reads
> */
> amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
> amdgpu_ring_write(ring, 0x0);
> - /* Emits 128 dw nop to prevent CE access VM before
> vm_flush finish */
> - amdgpu_ring_insert_nop(ring, 128);
> }
> }
>
> @@ -3629,15 +3627,24 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_gfx = {
> .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
> .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
> .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
> - .emit_frame_size =
> - 20 + /* gfx_v9_0_ring_emit_gds_switch */
> - 7 + /* gfx_v9_0_ring_emit_hdp_flush */
> - 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
> - 8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence,
> vm fence */
> - 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> - 128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */
> - 2 + /* gfx_v9_ring_emit_sb */
> - 3, /* gfx_v9_ring_emit_cntxcntl */
> + .emit_frame_size = /* totally 242 maximum if 16 IBs */
> + 5 + /* COND_EXEC */
> + 7 + /* PIPELINE_SYNC */
> + 46 + /* VM_FLUSH */
> + 8 + /* FENCE for VM_FLUSH */
> + 20 + /* GDS switch */
> + 4 + /* double SWITCH_BUFFER,
> + the first COND_EXEC jump to the place just
> + prior to this double SWITCH_BUFFER */
> + 5 + /* COND_EXEC */
> + 7 + /* HDP_flush */
> + 4 + /* VGT_flush */
> + 14 + /* CE_META */
> + 31 + /* DE_META */
> + 3 + /* CNTX_CTRL */
> + 5 + /* HDP_INVL */
> + 8 + 8 + /* FENCE x2 */
> + 2, /* SWITCH_BUFFER */
> .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
> .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
> .emit_fence = gfx_v9_0_ring_emit_fence,
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list