[PATCH 12/23] drm/amdgpu: Insert meta data during submitting IB
Alex Deucher
alexdeucher at gmail.com
Mon Dec 19 23:23:03 UTC 2016
On Sat, Dec 17, 2016 at 11:16 AM, Xiangliang Yu <Xiangliang.Yu at amd.com> wrote:
> Virtualization world switch need each command that is submitted
> into GFX with an extra entry, which will using WRITE_DATA to
> fullfill CSA. In this way, CP will save CE/DE snapshots when
> preemption occurred and restore it later.
>
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> Signed-off-by: Xiangliang Yu <Xiangliang.Yu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 +
> drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h | 39 ++++++++++++++
> drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c | 88 ++++++++++++++++++++++++++++++++
> 4 files changed, 131 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index acf48de..cc35255 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -175,6 +175,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> if (ring->funcs->emit_hdp_flush)
> amdgpu_ring_emit_hdp_flush(ring);
>
> + amdgpu_gfx_ring_emit_meta_data(ring, vm);
> +
> /* always set cond_exec_polling to CONTINUE */
> *ring->cond_exe_cpu_addr = 1;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index dff1248..d6f57a2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -69,4 +69,6 @@ int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev);
> /* Context Save Area functions */
> int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring,
> + struct amdgpu_vm *vm);
> #endif
> diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> index a25e07f..cc3123b 100644
> --- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> +++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> @@ -26,9 +26,48 @@
>
> #include "amdgpu.h"
>
> +/* context save area structures */
> +struct amdgpu_ce_ib_state {
> + uint32_t ce_ib_completion_status;
> + uint32_t ce_const_engine_count;
> + uint32_t ce_ib_offset_ib1;
> + uint32_t ce_ib_offset_ib2;
> +};
> +
> +struct amdgpu_de_ib_state {
> + uint32_t de_ib_completion_status;
> + uint32_t de_const_engine_count;
> + uint32_t de_ib_offset_ib1;
> + uint32_t de_ib_offset_ib2;
> + uint32_t preamble_begin_ib1;
> + uint32_t preamble_begin_ib2;
> + uint32_t preamble_end_ib1;
> + uint32_t preamble_end_ib2;
> + uint32_t draw_indirect_base_lo;
> + uint32_t draw_indirect_base_hi;
> + uint32_t disp_indirect_base_lo;
> + uint32_t disp_indirect_base_hi;
> + uint32_t gds_backup_addr_lo;
> + uint32_t gds_backup_addr_hi;
> + uint32_t index_base_addr_lo;
> + uint32_t index_base_addr_hi;
> + uint32_t sample_cntl;
> +};
> +
> +struct amdgpu_gfx_meta_data {
> + struct amdgpu_ce_ib_state ce_payload;
> + uint32_t reserved1[60];
> + struct amdgpu_de_ib_state de_payload;
> + uint32_t de_ib_base_addr_lo;
> + uint32_t de_ib_base_addr_hi;
> + uint32_t reserved2[941];
> +};
> +
These are gfx8 specific and should be moved to gfx8 module.
> /* xgpu structures */
> struct amd_xgpu_csa {
> struct amdgpu_bo *robj;
> + struct amdgpu_ce_ib_state ce_payload;
> + struct amdgpu_de_ib_state de_payload;
> uint64_t gpu_addr;
> uint64_t gds_addr;
> int32_t size;
> diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
> index 246a747..6d4246c 100644
> --- a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
> +++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
> @@ -207,3 +207,91 @@ void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> sa = &xgpu->sa;
> xgpu_vm_unmap_csa(adev, vm, sa);
> }
> +
> +static void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
> + void *src, int count_dw)
> +{
> + if (ring->count_dw < count_dw)
> + DRM_ERROR("writing more dwords to the ring than expected:%d.\n",
> + count_dw);
> + else {
> + unsigned int chunk1, chunk2;
> + void *dst = (void *)&ring->ring[ring->wptr];
> +
> + chunk1 = ring->ptr_mask + 1 - ring->wptr;
> + chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
> + chunk2 = count_dw - chunk1;
> + chunk1 <<= 2;
> + chunk2 <<= 2;
> + if (chunk1) {
> + memcpy(dst, src, chunk1);
> + dst = (void *)(((uint64_t)dst + chunk1) &
> + ring->ptr_mask);
> + }
> +
> + if (chunk2) {
> + src += chunk1;
> + dst = (void *)ring->ring;
> + memcpy(dst, src, chunk2);
> + }
> +
> + ring->wptr += count_dw;
> + ring->wptr &= ring->ptr_mask;
> + ring->count_dw -= count_dw;
> + }
> +}
> +
> +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring,
> + struct amdgpu_vm *vm)
> +{
> + struct amdgpu_ce_ib_state *ce_payload;
> + struct amdgpu_de_ib_state *de_payload;
> + struct amd_xgpu_csa *sa = NULL;
> + struct amd_xgpu *xgpu = (struct amd_xgpu *)ring->adev->priv_data;
> + uint64_t csa_addr, gds_addr;
> + int cnt;
> +
> + if (!xgpu || (ring->funcs->type != AMDGPU_RING_TYPE_GFX))
> + return;
> +
> + sa = &xgpu->sa;
No need to make this dependent on xgpu. As I said in the previous
patch preemption is useful independent of xgpu.
> +
> + ce_payload = &sa->ce_payload;
> + de_payload = &sa->de_payload;
> + memset(ce_payload, 0, sizeof(*ce_payload));
> + memset(de_payload, 0, sizeof(*de_payload));
> +
> + cnt = (sizeof(*ce_payload) >> 2) + 4 - 2;
> + csa_addr = vm ? vm->csa.csa_addr : sa->gpu_addr;
> + gds_addr = vm ? vm->csa.gds_addr : sa->gds_addr;
> + de_payload->gds_backup_addr_lo = lower_32_bits(gds_addr);
> + de_payload->gds_backup_addr_hi = upper_32_bits(gds_addr);
> +
> + /* write CE meta data */
> + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
> + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(2) |
> + WRITE_DATA_DST_SEL(8) |
> + WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0));
> + amdgpu_ring_write(ring, lower_32_bits(csa_addr +
> + offsetof(struct amdgpu_gfx_meta_data, ce_payload)));
> + amdgpu_ring_write(ring, upper_32_bits(csa_addr +
> + offsetof(struct amdgpu_gfx_meta_data, ce_payload)));
> +
> + amdgpu_ring_write_multiple(ring, (void *)ce_payload,
> + sizeof(*ce_payload) >> 2);
> +
> + /* write DE meta data */
> + cnt = (sizeof(*de_payload) >> 2) + 4 - 2;
> +
> + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
> + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(1) |
> + WRITE_DATA_DST_SEL(8) |
> + WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0));
> + amdgpu_ring_write(ring, lower_32_bits(csa_addr +
> + offsetof(struct amdgpu_gfx_meta_data, de_payload)));
> + amdgpu_ring_write(ring, upper_32_bits(csa_addr +
> + offsetof(struct amdgpu_gfx_meta_data, de_payload)));
> +
> + amdgpu_ring_write_multiple(ring, (void *)de_payload,
> + sizeof(*de_payload) >> 2);
> +}
This function is gfx8 specific and should be moved to the gfx8 module.
Alex
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list