[PATCH 3/5] drm/amdgpu: invalidate only the currently needed VMHUB
Alex Deucher
alexdeucher at gmail.com
Wed Apr 5 18:42:11 UTC 2017
On Wed, Apr 5, 2017 at 12:21 PM, Christian König
<deathsimple at vodafone.de> wrote:
> From: Christian König <christian.koenig at amd.com>
>
> Drop invalidating both hubs from each engine.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 36 +++++------
> drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 60 +++++++++---------
> drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 111 +++++++++++++++------------------
> drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 57 ++++++++---------
> 4 files changed, 118 insertions(+), 146 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 1cc006a..dce2950 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3147,35 +3147,29 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
> static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
> unsigned vm_id, uint64_t pd_addr)
> {
> + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_GFXHUB];
Should use use ring->vmhub here rather than hardcoding
AMDGPU_GFXHUB/AMDGPU_MMHUB? Same question for all the other IP below.
With that fixed:
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
> int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
> unsigned eng = ring->idx;
> - unsigned i;
>
> pd_addr = pd_addr | 0x1; /* valid bit */
> /* now only use physical base address of PDE and valid */
> BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>
> - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
> - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
> -
> - gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> - hub->ctx0_ptb_addr_lo32
> - + (2 * vm_id),
> - lower_32_bits(pd_addr));
> + gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> + hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
> + lower_32_bits(pd_addr));
>
> - gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> - hub->ctx0_ptb_addr_hi32
> - + (2 * vm_id),
> - upper_32_bits(pd_addr));
> + gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> + hub->ctx0_ptb_addr_hi32 + (2 * vm_id),
> + upper_32_bits(pd_addr));
>
> - gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> - hub->vm_inv_eng0_req + eng, req);
> + gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> + hub->vm_inv_eng0_req + eng, req);
>
> - /* wait for the invalidate to complete */
> - gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
> - eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
> - }
> + /* wait for the invalidate to complete */
> + gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
> + eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
>
> /* compute doesn't have PFP */
> if (usepfp) {
> @@ -3680,7 +3674,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
> .emit_frame_size = /* totally 242 maximum if 16 IBs */
> 5 + /* COND_EXEC */
> 7 + /* PIPELINE_SYNC */
> - 46 + /* VM_FLUSH */
> + 24 + /* VM_FLUSH */
> 8 + /* FENCE for VM_FLUSH */
> 20 + /* GDS switch */
> 4 + /* double SWITCH_BUFFER,
> @@ -3727,7 +3721,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
> 7 + /* gfx_v9_0_ring_emit_hdp_flush */
> 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
> 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> - 64 + /* gfx_v9_0_ring_emit_vm_flush */
> + 24 + /* gfx_v9_0_ring_emit_vm_flush */
> 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
> .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
> .emit_ib = gfx_v9_0_ring_emit_ib_compute,
> @@ -3757,7 +3751,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
> 7 + /* gfx_v9_0_ring_emit_hdp_flush */
> 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
> 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> - 64 + /* gfx_v9_0_ring_emit_vm_flush */
> + 24 + /* gfx_v9_0_ring_emit_vm_flush */
> 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
> .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
> .emit_ib = gfx_v9_0_ring_emit_ib_compute,
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 8cbb49d..06826a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1039,44 +1039,40 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
> static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
> unsigned vm_id, uint64_t pd_addr)
> {
> + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
> uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
> unsigned eng = ring->idx;
> - unsigned i;
>
> pd_addr = pd_addr | 0x1; /* valid bit */
> /* now only use physical base address of PDE and valid */
> BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>
> - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
> - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
> -
> - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
> - amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> -
> - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
> - amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> -
> - /* flush TLB */
> - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> - amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
> - amdgpu_ring_write(ring, req);
> -
> - /* wait for flush */
> - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
> - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
> - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
> - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> - amdgpu_ring_write(ring, 0);
> - amdgpu_ring_write(ring, 1 << vm_id); /* reference */
> - amdgpu_ring_write(ring, 1 << vm_id); /* mask */
> - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
> - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
> - }
> + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
> + amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +
> + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
> + amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> +
> + /* flush TLB */
> + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> + amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
> + amdgpu_ring_write(ring, req);
> +
> + /* wait for flush */
> + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
> + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
> + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
> + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> + amdgpu_ring_write(ring, 0);
> + amdgpu_ring_write(ring, 1 << vm_id); /* reference */
> + amdgpu_ring_write(ring, 1 << vm_id); /* mask */
> + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
> + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
> }
>
> static int sdma_v4_0_early_init(void *handle)
> @@ -1481,7 +1477,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
> 6 + /* sdma_v4_0_ring_emit_hdp_flush */
> 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */
> 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
> - 36 + /* sdma_v4_0_ring_emit_vm_flush */
> + 18 + /* sdma_v4_0_ring_emit_vm_flush */
> 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
> .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
> .emit_ib = sdma_v4_0_ring_emit_ib,
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index fa80465..772c0f2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -1034,42 +1034,38 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
> static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
> unsigned vm_id, uint64_t pd_addr)
> {
> + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
> uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
> uint32_t data0, data1, mask;
> unsigned eng = ring->idx;
> - unsigned i;
>
> pd_addr = pd_addr | 0x1; /* valid bit */
> /* now only use physical base address of PDE and valid */
> BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>
> - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
> - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
> -
> - data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
> - data1 = upper_32_bits(pd_addr);
> - uvd_v7_0_vm_reg_write(ring, data0, data1);
> -
> - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
> - data1 = lower_32_bits(pd_addr);
> - uvd_v7_0_vm_reg_write(ring, data0, data1);
> -
> - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
> - data1 = lower_32_bits(pd_addr);
> - mask = 0xffffffff;
> - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
> -
> - /* flush TLB */
> - data0 = (hub->vm_inv_eng0_req + eng) << 2;
> - data1 = req;
> - uvd_v7_0_vm_reg_write(ring, data0, data1);
> -
> - /* wait for flush */
> - data0 = (hub->vm_inv_eng0_ack + eng) << 2;
> - data1 = 1 << vm_id;
> - mask = 1 << vm_id;
> - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
> - }
> + data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
> + data1 = upper_32_bits(pd_addr);
> + uvd_v7_0_vm_reg_write(ring, data0, data1);
> +
> + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
> + data1 = lower_32_bits(pd_addr);
> + uvd_v7_0_vm_reg_write(ring, data0, data1);
> +
> + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
> + data1 = lower_32_bits(pd_addr);
> + mask = 0xffffffff;
> + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
> +
> + /* flush TLB */
> + data0 = (hub->vm_inv_eng0_req + eng) << 2;
> + data1 = req;
> + uvd_v7_0_vm_reg_write(ring, data0, data1);
> +
> + /* wait for flush */
> + data0 = (hub->vm_inv_eng0_ack + eng) << 2;
> + data1 = 1 << vm_id;
> + mask = 1 << vm_id;
> + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
> }
>
> static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
> @@ -1080,44 +1076,37 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
> static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
> unsigned int vm_id, uint64_t pd_addr)
> {
> + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
> uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
> unsigned eng = ring->idx;
> - unsigned i;
>
> pd_addr = pd_addr | 0x1; /* valid bit */
> /* now only use physical base address of PDE and valid */
> BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>
> - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
> - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
> -
> - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> - amdgpu_ring_write(ring,
> - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
> - amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> -
> - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> - amdgpu_ring_write(ring,
> - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> - amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> -
> - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
> - amdgpu_ring_write(ring,
> - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> - amdgpu_ring_write(ring, 0xffffffff);
> - amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> -
> - /* flush TLB */
> - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
> - amdgpu_ring_write(ring, req);
> -
> - /* wait for flush */
> - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
> - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> - amdgpu_ring_write(ring, 1 << vm_id);
> - amdgpu_ring_write(ring, 1 << vm_id);
> - }
> + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
> + amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> +
> + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> + amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +
> + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
> + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> + amdgpu_ring_write(ring, 0xffffffff);
> + amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +
> + /* flush TLB */
> + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
> + amdgpu_ring_write(ring, req);
> +
> + /* wait for flush */
> + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
> + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> + amdgpu_ring_write(ring, 1 << vm_id);
> + amdgpu_ring_write(ring, 1 << vm_id);
> }
>
> #if 0
> @@ -1455,7 +1444,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
> .emit_frame_size =
> 2 + /* uvd_v7_0_ring_emit_hdp_flush */
> 2 + /* uvd_v7_0_ring_emit_hdp_invalidate */
> - 34 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_ring_emit_vm_flush */
> + 34 + /* uvd_v7_0_ring_emit_vm_flush */
> 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
> .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
> .emit_ib = uvd_v7_0_ring_emit_ib,
> @@ -1481,7 +1470,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
> .get_wptr = uvd_v7_0_enc_ring_get_wptr,
> .set_wptr = uvd_v7_0_enc_ring_set_wptr,
> .emit_frame_size =
> - 17 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_enc_ring_emit_vm_flush */
> + 17 + /* uvd_v7_0_enc_ring_emit_vm_flush */
> 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
> 1, /* uvd_v7_0_enc_ring_insert_end */
> .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index 6374133..5e4f243 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -973,44 +973,37 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
> static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
> unsigned int vm_id, uint64_t pd_addr)
> {
> + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
> uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
> unsigned eng = ring->idx;
> - unsigned i;
>
> pd_addr = pd_addr | 0x1; /* valid bit */
> /* now only use physical base address of PDE and valid */
> BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>
> - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
> - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
> -
> - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> - amdgpu_ring_write(ring,
> - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
> - amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> -
> - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> - amdgpu_ring_write(ring,
> - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> - amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> -
> - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
> - amdgpu_ring_write(ring,
> - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> - amdgpu_ring_write(ring, 0xffffffff);
> - amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> -
> - /* flush TLB */
> - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
> - amdgpu_ring_write(ring, req);
> -
> - /* wait for flush */
> - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
> - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> - amdgpu_ring_write(ring, 1 << vm_id);
> - amdgpu_ring_write(ring, 1 << vm_id);
> - }
> + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
> + amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> +
> + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> + amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +
> + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
> + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> + amdgpu_ring_write(ring, 0xffffffff);
> + amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +
> + /* flush TLB */
> + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
> + amdgpu_ring_write(ring, req);
> +
> + /* wait for flush */
> + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
> + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> + amdgpu_ring_write(ring, 1 << vm_id);
> + amdgpu_ring_write(ring, 1 << vm_id);
> }
>
> static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
> @@ -1080,7 +1073,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
> .set_wptr = vce_v4_0_ring_set_wptr,
> .parse_cs = amdgpu_vce_ring_parse_cs_vm,
> .emit_frame_size =
> - 17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */
> + 17 + /* vce_v4_0_emit_vm_flush */
> 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
> 1, /* vce_v4_0_ring_insert_end */
> .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
> --
> 2.5.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list