[PATCH 4/8] drm/amdgpu: rework job synchronization
Felix Kuehling
felix.kuehling at amd.com
Mon Jan 13 17:16:52 UTC 2020
On 2020-01-13 9:40 a.m., Christian König wrote:
> For unlocked page table updates we need to be able
> to sync to fences of a specific VM.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 49 ++++++++++++-------
> drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 15 ++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 2 +-
> 8 files changed, 59 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index d8db5ecdf9c1..9e7889c28f3e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -848,9 +848,9 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
> vm_list_node) {
> struct amdgpu_bo *pd = peer_vm->root.base.bo;
>
> - ret = amdgpu_sync_resv(NULL,
> - sync, pd->tbo.base.resv,
> - AMDGPU_FENCE_OWNER_KFD, false);
> + ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv,
> + AMDGPU_SYNC_NE_OWNER,
> + AMDGPU_FENCE_OWNER_KFD);
> if (ret)
> return ret;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index cf79f30c0af6..d7b5efaa091c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -662,10 +662,12 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
> list_for_each_entry(e, &p->validated, tv.head) {
> struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> struct dma_resv *resv = bo->tbo.base.resv;
> + enum amdgpu_sync_mode sync_mode;
>
> - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, &fpriv->vm,
> - amdgpu_bo_explicit_sync(bo));
> -
> + sync_mode = amdgpu_bo_explicit_sync(bo) ?
> + AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
> + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
> + &fpriv->vm);
> if (r)
> return r;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index ca9f74585890..46c76e2e1281 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1419,7 +1419,8 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr)
> int r;
>
> amdgpu_sync_create(&sync);
> - amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv, owner, false);
> + amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
> + AMDGPU_SYNC_NE_OWNER, owner);
> r = amdgpu_sync_wait(&sync, intr);
> amdgpu_sync_free(&sync);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index a09b6b9c27d1..c124f64e7aae 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -202,18 +202,17 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
> *
> * @sync: sync object to add fences from reservation object to
> * @resv: reservation object with embedded fence
> - * @explicit_sync: true if we should only sync to the exclusive fence
> + * @mode: how owner affects which fences we sync to
> + * @owner: owner of the planned job submission
> *
> * Sync to the fence
> */
> -int amdgpu_sync_resv(struct amdgpu_device *adev,
> - struct amdgpu_sync *sync,
> - struct dma_resv *resv,
> - void *owner, bool explicit_sync)
> +int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> + struct dma_resv *resv, enum amdgpu_sync_mode mode,
> + void *owner)
> {
> struct dma_resv_list *flist;
> struct dma_fence *f;
> - void *fence_owner;
> unsigned i;
> int r = 0;
>
> @@ -229,6 +228,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
> return r;
>
> for (i = 0; i < flist->shared_count; ++i) {
> + void *fence_owner;
> +
> f = rcu_dereference_protected(flist->shared[i],
> dma_resv_held(resv));
> /* We only want to trigger KFD eviction fences on
> @@ -239,20 +240,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
> owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> continue;
>
> - if (amdgpu_sync_same_dev(adev, f)) {
> - /* VM updates only sync with moves but not with user
> - * command submissions or KFD evictions fences
> - */
> - if (owner == AMDGPU_FENCE_OWNER_VM &&
> - fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> + /* VM updates only sync with moves but not with user
> + * command submissions or KFD evictions fences
> + */
> + if (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
> + owner == AMDGPU_FENCE_OWNER_VM)
> + continue;
> +
> + /* Ignore fences depending on the sync mode */
> + switch (mode) {
> + case AMDGPU_SYNC_ALWAYS:
> + break;
> +
> + case AMDGPU_SYNC_NE_OWNER:
> + if (amdgpu_sync_same_dev(adev, f) &&
> + fence_owner == owner)
> continue;
> + break;
>
> - /* Ignore fence from the same owner and explicit one as
> - * long as it isn't undefined.
> - */
> - if (owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
> - (fence_owner == owner || explicit_sync))
> + case AMDGPU_SYNC_EQ_OWNER:
> + if (amdgpu_sync_same_dev(adev, f) &&
> + fence_owner != owner)
> + continue;
> + break;
> +
> + case AMDGPU_SYNC_EXPLICIT:
> + if (owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> continue;
> + break;
> }
>
> r = amdgpu_sync_fence(sync, f, false);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> index d62c2b81d92b..cfbe5788b8b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> @@ -31,6 +31,13 @@ struct dma_resv;
> struct amdgpu_device;
> struct amdgpu_ring;
>
> +enum amdgpu_sync_mode {
> + AMDGPU_SYNC_ALWAYS,
> + AMDGPU_SYNC_NE_OWNER,
> + AMDGPU_SYNC_EQ_OWNER,
> + AMDGPU_SYNC_EXPLICIT
> +};
> +
> /*
> * Container for fences used to sync command submissions.
> */
> @@ -43,11 +50,9 @@ void amdgpu_sync_create(struct amdgpu_sync *sync);
> int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
> bool explicit);
> int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
> -int amdgpu_sync_resv(struct amdgpu_device *adev,
> - struct amdgpu_sync *sync,
> - struct dma_resv *resv,
> - void *owner,
> - bool explicit_sync);
> +int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> + struct dma_resv *resv, enum amdgpu_sync_mode mode,
> + void *owner);
> struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
> struct amdgpu_ring *ring);
> struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index a3012edffd7a..ae1b00def5d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2080,8 +2080,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> }
> if (resv) {
> r = amdgpu_sync_resv(adev, &job->sync, resv,
> - AMDGPU_FENCE_OWNER_UNDEFINED,
> - false);
> + AMDGPU_SYNC_ALWAYS,
> + AMDGPU_FENCE_OWNER_UNDEFINED);
> if (r) {
> DRM_ERROR("sync failed (%d).\n", r);
> goto error_free;
> @@ -2165,7 +2165,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>
> if (resv) {
> r = amdgpu_sync_resv(adev, &job->sync, resv,
> - AMDGPU_FENCE_OWNER_UNDEFINED, false);
> + AMDGPU_SYNC_ALWAYS,
> + AMDGPU_FENCE_OWNER_UNDEFINED);
> if (r) {
> DRM_ERROR("sync failed (%d).\n", r);
> goto error_free;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index a92f3b18e657..11e8bec6582b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -1099,7 +1099,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
> goto err_free;
> } else {
> r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
> - AMDGPU_FENCE_OWNER_UNDEFINED, false);
> + AMDGPU_SYNC_NE_OWNER,
Should this be AMDGPU_SYNC_ALWAYS? I think that's closer to the original
behaviour.
Regards,
Felix
> + AMDGPU_FENCE_OWNER_UNDEFINED);
> if (r)
> goto err_free;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> index 4bbd8ff778ea..3b61317c0f08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> @@ -80,7 +80,7 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
> return 0;
>
> return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv,
> - owner, false);
> + AMDGPU_SYNC_NE_OWNER, owner);
> }
>
> /**
More information about the amd-gfx
mailing list