[PATCH] drm/amdgpu: Allow to create BO lists in CS ioctl.
Andrey Grodzovsky
Andrey.Grodzovsky at amd.com
Wed Jul 11 15:37:25 UTC 2018
On 07/11/2018 11:05 AM, Christian König wrote:
> Am 11.07.2018 um 16:51 schrieb Andrey Grodzovsky:
>> This change is to support MESA performace optimization.
>> Modify CS IOCTL to allow its input as command buffer and an array of
>> buffer handles to create a temporay bo list and then destroy it
>> when IOCTL completes.
>> This saves on calling for BO_LIST create and destry IOCTLs in MESA
>> and by this improves performance.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 12 ++++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 62
>> ++++++++++++++++++-----------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 52
>> +++++++++++++++++++++---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +-
>> include/uapi/drm/amdgpu_drm.h | 1 +
>> 5 files changed, 101 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 8eaba0f..e082eba 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -732,6 +732,16 @@ void amdgpu_bo_list_get_list(struct
>> amdgpu_bo_list *list,
>> struct list_head *validated);
>> void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>> void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
>> + struct drm_amdgpu_bo_list_entry **info_param);
>> +
>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>> + struct drm_file *filp,
>> + struct drm_amdgpu_bo_list_entry *info,
>> + unsigned num_entries,
>> + int *id);
>> +
>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
>> /*
>> * GFX stuff
>> @@ -1048,6 +1058,8 @@ struct amdgpu_cs_parser {
>> unsigned num_post_dep_syncobjs;
>> struct drm_syncobj **post_dep_syncobjs;
>> +
>> + bool destroy_bo_list;
>
> I think you don't need this. IIRC the bo_list structure was reference
> counted.
>
> So all you need to do is to make sure that the temporary bo_list you
> create has a reference count of 1 and so is destroyed when the CS
> IOCTL calls amdgpu_bo_list_put() on it.
>
> That would simplify the patch quite a bit.
amdgpu_bo_list_destroy is essential because it removes the list from idr
struct bo_list_handles, amdgpu_bo_list_put doesn't do it.
Regarding the refcount, it's 2 because it's 1 on list create in
amdgpu_cs_bo_handles_chunk->amdgpu_bo_list_create and then it's incremented
to 2 in amdgpu_cs_parser_bos->amdgpu_bo_list_get. So by calling
amdgpu_bo_list_put and then amdgpu_bo_list_destroy the count properly
drops down to 0.
Andrey
>
> Apart from that looks really good to me, especially that you only need
> a new chunk ID for the UAPI is quite nice.
>
> Thanks,
> Christian.
>
>> };
>> #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set
>> means command submit involves a preamble IB */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> index 92be7f6..9acdacc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> @@ -55,7 +55,7 @@ static void amdgpu_bo_list_release_rcu(struct kref
>> *ref)
>> kfree_rcu(list, rhead);
>> }
>> -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>> struct drm_file *filp,
>> struct drm_amdgpu_bo_list_entry *info,
>> unsigned num_entries,
>> @@ -91,7 +91,7 @@ static int amdgpu_bo_list_create(struct
>> amdgpu_device *adev,
>> return 0;
>> }
>> -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int
>> id)
>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>> {
>> struct amdgpu_bo_list *list;
>> @@ -263,49 +263,64 @@ void amdgpu_bo_list_free(struct
>> amdgpu_bo_list *list)
>> kfree(list);
>> }
>> -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>> - struct drm_file *filp)
>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
>> + struct drm_amdgpu_bo_list_entry **info_param)
>> {
>> - const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
>> -
>> - struct amdgpu_device *adev = dev->dev_private;
>> - struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> - union drm_amdgpu_bo_list *args = data;
>> - uint32_t handle = args->in.list_handle;
>> - const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
>> -
>> struct drm_amdgpu_bo_list_entry *info;
>> - struct amdgpu_bo_list *list;
>> -
>> int r;
>> + const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
>> + const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
>> - info = kvmalloc_array(args->in.bo_number,
>> + info = kvmalloc_array(in->bo_number,
>> sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
>> if (!info)
>> return -ENOMEM;
>> /* copy the handle array from userspace to a kernel buffer */
>> r = -EFAULT;
>> - if (likely(info_size == args->in.bo_info_size)) {
>> - unsigned long bytes = args->in.bo_number *
>> - args->in.bo_info_size;
>> + if (likely(info_size == in->bo_info_size)) {
>> + unsigned long bytes = in->bo_number *
>> + in->bo_info_size;
>> if (copy_from_user(info, uptr, bytes))
>> goto error_free;
>> } else {
>> - unsigned long bytes = min(args->in.bo_info_size, info_size);
>> + unsigned long bytes = min(in->bo_info_size, info_size);
>> unsigned i;
>> - memset(info, 0, args->in.bo_number * info_size);
>> - for (i = 0; i < args->in.bo_number; ++i) {
>> + memset(info, 0, in->bo_number * info_size);
>> + for (i = 0; i < in->bo_number; ++i) {
>> if (copy_from_user(&info[i], uptr, bytes))
>> goto error_free;
>> - uptr += args->in.bo_info_size;
>> + uptr += in->bo_info_size;
>> }
>> }
>> + *info_param = info;
>> + return 0;
>> +
>> +error_free:
>> + kvfree(info);
>> + return r;
>> +}
>> +
>> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>> + struct drm_file *filp)
>> +{
>> + struct amdgpu_device *adev = dev->dev_private;
>> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> + union drm_amdgpu_bo_list *args = data;
>> + uint32_t handle = args->in.list_handle;
>> + struct drm_amdgpu_bo_list_entry *info = NULL;
>> + struct amdgpu_bo_list *list;
>> + int r;
>> +
>> + r = amdgpu_bo_create_list_entry_array(&args->in, &info);
>> + if (r)
>> + goto error_free;
>> +
>> switch (args->in.operation) {
>> case AMDGPU_BO_LIST_OP_CREATE:
>> r = amdgpu_bo_list_create(adev, filp, info,
>> args->in.bo_number,
>> @@ -345,6 +360,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev,
>> void *data,
>> return 0;
>> error_free:
>> - kvfree(info);
>> + if (info)
>> + kvfree(info);
>> return r;
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 9881a1e..1b17f6b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -66,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct
>> amdgpu_cs_parser *p,
>> return 0;
>> }
>> -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void
>> *data)
>> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
>> + struct drm_amdgpu_bo_list_in *data,
>> + int *id)
>> +{
>> + int r;
>> + struct drm_amdgpu_bo_list_entry *info = NULL;
>> +
>> + r = amdgpu_bo_create_list_entry_array(data, &info);
>> + if (r)
>> + return r;
>> +
>> + r = amdgpu_bo_list_create(p->adev, p->filp, info,
>> data->bo_number, id);
>> + if (r)
>> + goto error_free;
>> +
>> + kvfree(info);
>> + return 0;
>> +
>> +error_free:
>> + if (info)
>> + kvfree(info);
>> +
>> + return r;
>> +}
>> +
>> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union
>> drm_amdgpu_cs *cs)
>> {
>> struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>> struct amdgpu_vm *vm = &fpriv->vm;
>> - union drm_amdgpu_cs *cs = data;
>> uint64_t *chunk_array_user;
>> uint64_t *chunk_array;
>> unsigned size, num_ibs = 0;
>> @@ -164,6 +188,20 @@ static int amdgpu_cs_parser_init(struct
>> amdgpu_cs_parser *p, void *data)
>> break;
>> + case AMDGPU_CHUNK_ID_BO_HANDLES:
>> + size = sizeof(struct drm_amdgpu_bo_list_in);
>> + if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
>> + ret = -EINVAL;
>> + goto free_partial_kdata;
>> + }
>> +
>> + ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata,
>> &cs->in.bo_list_handle);
>> + p->destroy_bo_list = true;
>> + if (ret)
>> + goto free_partial_kdata;
>> +
>> + break;
>> +
>> case AMDGPU_CHUNK_ID_DEPENDENCIES:
>> case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>> case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
>> @@ -747,7 +785,7 @@ static int amdgpu_cs_sync_rings(struct
>> amdgpu_cs_parser *p)
>> * used by parsing context.
>> **/
>> static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser,
>> int error,
>> - bool backoff)
>> + bool backoff, int id)
>> {
>> unsigned i;
>> @@ -765,9 +803,13 @@ static void amdgpu_cs_parser_fini(struct
>> amdgpu_cs_parser *parser, int error,
>> mutex_unlock(&parser->ctx->lock);
>> amdgpu_ctx_put(parser->ctx);
>> }
>> - if (parser->bo_list)
>> + if (parser->bo_list) {
>> amdgpu_bo_list_put(parser->bo_list);
>> + if (parser->destroy_bo_list)
>> + amdgpu_bo_list_destroy(parser->filp->driver_priv, id);
>> + }
>> +
>> for (i = 0; i < parser->nchunks; i++)
>> kvfree(parser->chunks[i].kdata);
>> kfree(parser->chunks);
>> @@ -1277,7 +1319,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev,
>> void *data, struct drm_file *filp)
>> r = amdgpu_cs_submit(&parser, cs);
>> out:
>> - amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>> + amdgpu_cs_parser_fini(&parser, r, reserved_buffers,
>> cs->in.bo_list_handle);
>> return r;
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 06aede1..529500c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -69,9 +69,10 @@
>> * - 3.24.0 - Add high priority compute support for gfx9
>> * - 3.25.0 - Add support for sensor query info (stable pstate
>> sclk/mclk).
>> * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
>> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
>> */
>> #define KMS_DRIVER_MAJOR 3
>> -#define KMS_DRIVER_MINOR 26
>> +#define KMS_DRIVER_MINOR 27
>> #define KMS_DRIVER_PATCHLEVEL 0
>> int amdgpu_vram_limit = 0;
>> diff --git a/include/uapi/drm/amdgpu_drm.h
>> b/include/uapi/drm/amdgpu_drm.h
>> index 09741ba..94444ee 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
>> #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03
>> #define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04
>> #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05
>> +#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06
>> struct drm_amdgpu_cs_chunk {
>> __u32 chunk_id;
>
More information about the amd-gfx
mailing list