[PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill.

Wed Oct 11 07:33:57 UTC 2017

Am 10.10.2017 um 22:50 schrieb Andrey Grodzovsky:
> This enables old fence waiting before reservation lock is aquired
> which in turn is part of a bigger solution to deadlock happening
> when gpu reset with VRAM recovery accures during intensive rendering.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>

That looks like it should work, just a few style nit picks below.

With those fixed the patch is Reviewed-by: Christian König 
<christian.koenig at amd.com>.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 111 +++++++++++++++++++--------------
>   1 file changed, 64 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index fe7dd44..1a54e53 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -739,6 +739,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>   
>   	if (parser->ctx)
>   		amdgpu_ctx_put(parser->ctx);
> +

Unrelated whitespace change, please drop from the patch.

(BTW: Do you know how to efficiently modify patches with "git add -p" 
and "git commit --amend"?).

>   	if (parser->bo_list)
>   		amdgpu_bo_list_put(parser->bo_list);
>   
> @@ -845,7 +846,56 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
>   	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>   	struct amdgpu_vm *vm = &fpriv->vm;
>   	struct amdgpu_ring *ring = p->job->ring;
> -	int i, r;
> +	int i, j, r;
> +
> +	for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
> +
> +		struct amdgpu_cs_chunk *chunk;
> +		struct amdgpu_ib *ib;
> +		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
> +
> +		chunk = &p->chunks[i];
> +		ib = &p->job->ibs[j];
> +		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
> +
> +		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
> +				       continue;

The indentation of the continue looks wrong in the mail client.

> +
> +		if (p->job->ring->funcs->parse_cs) {
> +			struct amdgpu_bo_va_mapping *m;
> +			struct amdgpu_bo *aobj = NULL;
> +			uint64_t offset;
> +			uint8_t *kptr;
> +
> +			r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
> +					&aobj, &m);
> +			if (r) {
> +				DRM_ERROR("IB va_start is invalid\n");
> +				return r;
> +			}
> +
> +			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
> +				(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
> +				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
> +				return -EINVAL;
> +			}
> +
> +			/* the IB should be reserved at this point */
> +			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
> +			if (r) {
> +				return r;
> +			}
> +
> +			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
> +			kptr += chunk_ib->va_start - offset;
> +
> +			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
> +			amdgpu_bo_kunmap(aobj);
> +		}
> +
> +		j++;
> +	}
> +
>   
>   	/* Only for UVD/VCE VM emulation */
>   	if (ring->funcs->parse_cs) {

The loop only does something if (p->job->ring->funcs->parse_cs), so we 
should be able to move it under the following if (ring->funcs->parse_cs).

> @@ -919,54 +969,20 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   
>   		parser->job->ring = ring;
>   
> -		if (ring->funcs->parse_cs) {
> -			struct amdgpu_bo_va_mapping *m;
> -			struct amdgpu_bo *aobj = NULL;
> -			uint64_t offset;
> -			uint8_t *kptr;
> -
> -			r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
> -						   &aobj, &m);
> -			if (r) {
> -				DRM_ERROR("IB va_start is invalid\n");
> -				return r;
> -			}
> -
> -			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
> -			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
> -				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
> -				return -EINVAL;
> -			}
> -
> -			/* the IB should be reserved at this point */
> -			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
> -			if (r) {
> -				return r;
> -			}
> -
> -			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
> -			kptr += chunk_ib->va_start - offset;
> -
> -			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
> -			if (r) {
> -				DRM_ERROR("Failed to get ib !\n");
> -				return r;
> -			}
> -
> -			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
> -			amdgpu_bo_kunmap(aobj);
> -		} else {
> -			r =  amdgpu_ib_get(adev, vm, 0, ib);
> -			if (r) {
> -				DRM_ERROR("Failed to get ib !\n");
> -				return r;
> -			}
> -
> +		r =  amdgpu_ib_get(
> +				adev,
> +				vm,
> +				ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
> +				ib);

Looks correct to me, but the coding style should be more something like 
this:

r =  amdgpu_ib_get(adev, vm,
            ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
            ib);

BTW: What editor do you use? I tend to forget the coding style all the 
time as well, so I've just use appropriate editor settings.

Thanks for the help,
Christian.

> +		if (r) {
> +			DRM_ERROR("Failed to get ib !\n");
> +			return r;
>   		}
>   
>   		ib->gpu_addr = chunk_ib->va_start;
>   		ib->length_dw = chunk_ib->ib_bytes / 4;
>   		ib->flags = chunk_ib->flags;
> +
>   		j++;
>   	}
>   
> @@ -1212,6 +1228,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   		goto out;
>   	}
>   
> +	r = amdgpu_cs_ib_fill(adev, &parser);
> +	if (r)
> +		goto out;
> +
>   	r = amdgpu_cs_parser_bos(&parser, data);
>   	if (r) {
>   		if (r == -ENOMEM)
> @@ -1222,9 +1242,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	}
>   
>   	reserved_buffers = true;
> -	r = amdgpu_cs_ib_fill(adev, &parser);
> -	if (r)
> -		goto out;
>   
>   	r = amdgpu_cs_dependencies(adev, &parser);
>   	if (r) {