[PATCH] drm/amdgpu: Allow to create BO lists in CS ioctl.

Andrey Grodzovsky Andrey.Grodzovsky at amd.com
Wed Jul 11 15:37:25 UTC 2018



On 07/11/2018 11:05 AM, Christian König wrote:
> Am 11.07.2018 um 16:51 schrieb Andrey Grodzovsky:
>> This change is to support MESA performace optimization.
>> Modify CS IOCTL to allow its input as command buffer and an array of
>> buffer handles to create a temporay bo list and then destroy it
>> when IOCTL completes.
>> This saves on calling for BO_LIST create and destry IOCTLs in MESA
>> and by this improves performance.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 12 ++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 62 
>> ++++++++++++++++++-----------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 52 
>> +++++++++++++++++++++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
>>   include/uapi/drm/amdgpu_drm.h               |  1 +
>>   5 files changed, 101 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 8eaba0f..e082eba 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -732,6 +732,16 @@ void amdgpu_bo_list_get_list(struct 
>> amdgpu_bo_list *list,
>>                    struct list_head *validated);
>>   void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>>   void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
>> +                      struct drm_amdgpu_bo_list_entry **info_param);
>> +
>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>> +                 struct drm_file *filp,
>> +                 struct drm_amdgpu_bo_list_entry *info,
>> +                 unsigned num_entries,
>> +                 int *id);
>> +
>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
>>     /*
>>    * GFX stuff
>> @@ -1048,6 +1058,8 @@ struct amdgpu_cs_parser {
>>         unsigned num_post_dep_syncobjs;
>>       struct drm_syncobj **post_dep_syncobjs;
>> +
>> +    bool destroy_bo_list;
>
> I think you don't need this. IIRC the bo_list structure was reference 
> counted.
>
> So all you need to do is to make sure that the temporary bo_list you 
> create has a reference count of 1 and so is destroyed when the CS 
> IOCTL calls amdgpu_bo_list_put() on it.
>
> That would simplify the patch quite a bit.

amdgpu_bo_list_destroy is essential because it removes the list from idr 
struct bo_list_handles, amdgpu_bo_list_put doesn't do it.
Regarding the refcount, it's 2 because it's 1 on list create in 
amdgpu_cs_bo_handles_chunk->amdgpu_bo_list_create and then it's incremented
to 2 in amdgpu_cs_parser_bos->amdgpu_bo_list_get. So  by calling 
amdgpu_bo_list_put and then amdgpu_bo_list_destroy the count properly
drops down to 0.

Andrey

>
> Apart from that looks really good to me, especially that you only need 
> a new chunk ID for the UAPI is quite nice.
>
> Thanks,
> Christian.
>
>>   };
>>     #define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set 
>> means command submit involves a preamble IB */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> index 92be7f6..9acdacc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> @@ -55,7 +55,7 @@ static void amdgpu_bo_list_release_rcu(struct kref 
>> *ref)
>>       kfree_rcu(list, rhead);
>>   }
>>   -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>                    struct drm_file *filp,
>>                    struct drm_amdgpu_bo_list_entry *info,
>>                    unsigned num_entries,
>> @@ -91,7 +91,7 @@ static int amdgpu_bo_list_create(struct 
>> amdgpu_device *adev,
>>       return 0;
>>   }
>>   -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int 
>> id)
>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>>   {
>>       struct amdgpu_bo_list *list;
>>   @@ -263,49 +263,64 @@ void amdgpu_bo_list_free(struct 
>> amdgpu_bo_list *list)
>>       kfree(list);
>>   }
>>   -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>> -                struct drm_file *filp)
>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
>> +                      struct drm_amdgpu_bo_list_entry **info_param)
>>   {
>> -    const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
>> -
>> -    struct amdgpu_device *adev = dev->dev_private;
>> -    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> -    union drm_amdgpu_bo_list *args = data;
>> -    uint32_t handle = args->in.list_handle;
>> -    const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
>> -
>>       struct drm_amdgpu_bo_list_entry *info;
>> -    struct amdgpu_bo_list *list;
>> -
>>       int r;
>> +    const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
>> +    const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
>>   -    info = kvmalloc_array(args->in.bo_number,
>> +    info = kvmalloc_array(in->bo_number,
>>                    sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
>>       if (!info)
>>           return -ENOMEM;
>>         /* copy the handle array from userspace to a kernel buffer */
>>       r = -EFAULT;
>> -    if (likely(info_size == args->in.bo_info_size)) {
>> -        unsigned long bytes = args->in.bo_number *
>> -            args->in.bo_info_size;
>> +    if (likely(info_size == in->bo_info_size)) {
>> +        unsigned long bytes = in->bo_number *
>> +            in->bo_info_size;
>>             if (copy_from_user(info, uptr, bytes))
>>               goto error_free;
>>         } else {
>> -        unsigned long bytes = min(args->in.bo_info_size, info_size);
>> +        unsigned long bytes = min(in->bo_info_size, info_size);
>>           unsigned i;
>>   -        memset(info, 0, args->in.bo_number * info_size);
>> -        for (i = 0; i < args->in.bo_number; ++i) {
>> +        memset(info, 0, in->bo_number * info_size);
>> +        for (i = 0; i < in->bo_number; ++i) {
>>               if (copy_from_user(&info[i], uptr, bytes))
>>                   goto error_free;
>>   -            uptr += args->in.bo_info_size;
>> +            uptr += in->bo_info_size;
>>           }
>>       }
>>   +    *info_param = info;
>> +    return 0;
>> +
>> +error_free:
>> +    kvfree(info);
>> +    return r;
>> +}
>> +
>> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>> +                struct drm_file *filp)
>> +{
>> +    struct amdgpu_device *adev = dev->dev_private;
>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +    union drm_amdgpu_bo_list *args = data;
>> +    uint32_t handle = args->in.list_handle;
>> +    struct drm_amdgpu_bo_list_entry *info = NULL;
>> +    struct amdgpu_bo_list *list;
>> +    int r;
>> +
>> +    r = amdgpu_bo_create_list_entry_array(&args->in, &info);
>> +    if (r)
>> +        goto error_free;
>> +
>>       switch (args->in.operation) {
>>       case AMDGPU_BO_LIST_OP_CREATE:
>>           r = amdgpu_bo_list_create(adev, filp, info, 
>> args->in.bo_number,
>> @@ -345,6 +360,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, 
>> void *data,
>>       return 0;
>>     error_free:
>> -    kvfree(info);
>> +    if (info)
>> +        kvfree(info);
>>       return r;
>>   }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 9881a1e..1b17f6b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -66,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct 
>> amdgpu_cs_parser *p,
>>       return 0;
>>   }
>>   -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void 
>> *data)
>> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
>> +                      struct drm_amdgpu_bo_list_in *data,
>> +                      int *id)
>> +{
>> +    int r;
>> +    struct drm_amdgpu_bo_list_entry *info = NULL;
>> +
>> +    r = amdgpu_bo_create_list_entry_array(data, &info);
>> +    if (r)
>> +        return r;
>> +
>> +    r = amdgpu_bo_list_create(p->adev, p->filp, info, 
>> data->bo_number, id);
>> +    if (r)
>> +        goto error_free;
>> +
>> +    kvfree(info);
>> +    return 0;
>> +
>> +error_free:
>> +    if (info)
>> +        kvfree(info);
>> +
>> +    return r;
>> +}
>> +
>> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union 
>> drm_amdgpu_cs *cs)
>>   {
>>       struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>>       struct amdgpu_vm *vm = &fpriv->vm;
>> -    union drm_amdgpu_cs *cs = data;
>>       uint64_t *chunk_array_user;
>>       uint64_t *chunk_array;
>>       unsigned size, num_ibs = 0;
>> @@ -164,6 +188,20 @@ static int amdgpu_cs_parser_init(struct 
>> amdgpu_cs_parser *p, void *data)
>>                 break;
>>   +        case AMDGPU_CHUNK_ID_BO_HANDLES:
>> +            size = sizeof(struct drm_amdgpu_bo_list_in);
>> +            if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
>> +                ret = -EINVAL;
>> +                goto free_partial_kdata;
>> +            }
>> +
>> +            ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata, 
>> &cs->in.bo_list_handle);
>> +            p->destroy_bo_list = true;
>> +            if (ret)
>> +                goto free_partial_kdata;
>> +
>> +            break;
>> +
>>           case AMDGPU_CHUNK_ID_DEPENDENCIES:
>>           case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>>           case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
>> @@ -747,7 +785,7 @@ static int amdgpu_cs_sync_rings(struct 
>> amdgpu_cs_parser *p)
>>    * used by parsing context.
>>    **/
>>   static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, 
>> int error,
>> -                  bool backoff)
>> +                  bool backoff, int id)
>>   {
>>       unsigned i;
>>   @@ -765,9 +803,13 @@ static void amdgpu_cs_parser_fini(struct 
>> amdgpu_cs_parser *parser, int error,
>>           mutex_unlock(&parser->ctx->lock);
>>           amdgpu_ctx_put(parser->ctx);
>>       }
>> -    if (parser->bo_list)
>> +    if (parser->bo_list) {
>>           amdgpu_bo_list_put(parser->bo_list);
>>   +        if (parser->destroy_bo_list)
>> + amdgpu_bo_list_destroy(parser->filp->driver_priv, id);
>> +    }
>> +
>>       for (i = 0; i < parser->nchunks; i++)
>>           kvfree(parser->chunks[i].kdata);
>>       kfree(parser->chunks);
>> @@ -1277,7 +1319,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
>> void *data, struct drm_file *filp)
>>       r = amdgpu_cs_submit(&parser, cs);
>>     out:
>> -    amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>> +    amdgpu_cs_parser_fini(&parser, r, reserved_buffers, 
>> cs->in.bo_list_handle);
>>       return r;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 06aede1..529500c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -69,9 +69,10 @@
>>    * - 3.24.0 - Add high priority compute support for gfx9
>>    * - 3.25.0 - Add support for sensor query info (stable pstate 
>> sclk/mclk).
>>    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
>> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
>>    */
>>   #define KMS_DRIVER_MAJOR    3
>> -#define KMS_DRIVER_MINOR    26
>> +#define KMS_DRIVER_MINOR    27
>>   #define KMS_DRIVER_PATCHLEVEL    0
>>     int amdgpu_vram_limit = 0;
>> diff --git a/include/uapi/drm/amdgpu_drm.h 
>> b/include/uapi/drm/amdgpu_drm.h
>> index 09741ba..94444ee 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
>>   #define AMDGPU_CHUNK_ID_DEPENDENCIES    0x03
>>   #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
>>   #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
>> +#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
>>     struct drm_amdgpu_cs_chunk {
>>       __u32        chunk_id;
>



More information about the amd-gfx mailing list