[PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2
zhoucm1
zhoucm1 at amd.com
Thu Jul 12 04:21:11 UTC 2018
With more thinking for you performance reason, Can we go further more
not to create temp bo list at all? directly add them into
parser->validated list?
In fact, if bo array is very long, then overhead of bo list creation in
CS is considerable, which will double iterate all BOs compared to original.
From UMD perspective, they don't create bo list for every CS, they
could use old created bo_list for next several CS, if there is a new bo,
they just add it.
Thanks,
David Zhou
On 2018年07月12日 12:02, zhoucm1 wrote:
>
>
> On 2018年07月12日 11:09, Zhou, David(ChunMing) wrote:
>> Hi Andrey,
>>
>> Could you add compatibility flag or increase kms driver version? So
>> that user space can keep old path when using new one.
> Sorry for noise, it's already at end of path.
>
> Regards,
> David Zhou
>>
>> Regards,
>> David Zhou
>>
>> -----Original Message-----
>> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On
>> Behalf Of zhoucm1
>> Sent: Thursday, July 12, 2018 10:31 AM
>> To: Grodzovsky, Andrey <Andrey.Grodzovsky at amd.com>;
>> amd-gfx at lists.freedesktop.org
>> Cc: Olsak, Marek <Marek.Olsak at amd.com>; Koenig, Christian
>> <Christian.Koenig at amd.com>
>> Subject: Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS
>> ioctl v2
>>
>>
>>
>> On 2018年07月12日 04:57, Andrey Grodzovsky wrote:
>>> This change is to support MESA performace optimization.
>>> Modify CS IOCTL to allow its input as command buffer and an array of
>>> buffer handles to create a temporay bo list and then destroy it when
>>> IOCTL completes.
>>> This saves on calling for BO_LIST create and destry IOCTLs in MESA and
>>> by this improves performance.
>>>
>>> v2: Avoid inserting the temp list into idr struct.
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 ++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86
>>> ++++++++++++++++++-----------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 51 +++++++++++++++--
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +-
>>> include/uapi/drm/amdgpu_drm.h | 1 +
>>> 5 files changed, 114 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 8eaba0f..9b472b2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct
>>> amdgpu_bo_list *list,
>>> struct list_head *validated);
>>> void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>>> void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
>>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in
>>> *in,
>>> + struct drm_amdgpu_bo_list_entry **info_param);
>>> +
>>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>> + struct drm_file *filp,
>>> + struct drm_amdgpu_bo_list_entry *info,
>>> + unsigned num_entries,
>>> + int *id,
>>> + struct amdgpu_bo_list **list);
>>> +
>>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
>>> /*
>>> * GFX stuff
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> index 92be7f6..14c7c59 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> @@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct
>>> kref *ref)
>>> kfree_rcu(list, rhead);
>>> }
>>> -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>> struct drm_file *filp,
>>> struct drm_amdgpu_bo_list_entry *info,
>>> unsigned num_entries,
>>> - int *id)
>>> + int *id,
>>> + struct amdgpu_bo_list **list_out)
>>> {
>>> int r;
>>> struct amdgpu_fpriv *fpriv = filp->driver_priv; @@ -78,20
>>> +79,25 @@
>>> static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>> return r;
>>> }
>>> + if (id) {
>>> /* idr alloc should be called only after initialization of bo
>>> list. */
>>> - mutex_lock(&fpriv->bo_list_lock);
>>> - r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
>>> - mutex_unlock(&fpriv->bo_list_lock);
>>> - if (r < 0) {
>>> - amdgpu_bo_list_free(list);
>>> - return r;
>>> + mutex_lock(&fpriv->bo_list_lock);
>>> + r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0,
>>> GFP_KERNEL);
>>> + mutex_unlock(&fpriv->bo_list_lock);
>>> + if (r < 0) {
>>> + amdgpu_bo_list_free(list);
>>> + return r;
>>> + }
>>> + *id = r;
>>> }
>>> - *id = r;
>>> +
>>> + if (list_out)
>>> + *list_out = list;
>>> return 0;
>>> }
>>> -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int
>>> id)
>>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>>> {
>>> struct amdgpu_bo_list *list;
>>> @@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct
>>> amdgpu_bo_list *list)
>>> kfree(list);
>>> }
>>> -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>>> - struct drm_file *filp)
>>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in
>>> *in,
>>> + struct drm_amdgpu_bo_list_entry **info_param)
>>> {
>>> - const uint32_t info_size = sizeof(struct
>>> drm_amdgpu_bo_list_entry);
>>> -
>>> - struct amdgpu_device *adev = dev->dev_private;
>>> - struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> - union drm_amdgpu_bo_list *args = data;
>>> - uint32_t handle = args->in.list_handle;
>>> - const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
>>> -
>>> struct drm_amdgpu_bo_list_entry *info;
>>> - struct amdgpu_bo_list *list;
>>> -
>>> int r;
>>> + const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
>>> + const uint32_t info_size = sizeof(struct
>>> drm_amdgpu_bo_list_entry);
>>> - info = kvmalloc_array(args->in.bo_number,
>>> + info = kvmalloc_array(in->bo_number,
>>> sizeof(struct drm_amdgpu_bo_list_entry),
>>> GFP_KERNEL);
>>> if (!info)
>>> return -ENOMEM;
>>> /* copy the handle array from userspace to a kernel buffer */
>>> r = -EFAULT;
>>> - if (likely(info_size == args->in.bo_info_size)) {
>>> - unsigned long bytes = args->in.bo_number *
>>> - args->in.bo_info_size;
>>> + if (likely(info_size == in->bo_info_size)) {
>>> + unsigned long bytes = in->bo_number *
>>> + in->bo_info_size;
>>> if (copy_from_user(info, uptr, bytes))
>>> goto error_free;
>>> } else {
>>> - unsigned long bytes = min(args->in.bo_info_size, info_size);
>>> + unsigned long bytes = min(in->bo_info_size, info_size);
>>> unsigned i;
>>> - memset(info, 0, args->in.bo_number * info_size);
>>> - for (i = 0; i < args->in.bo_number; ++i) {
>>> + memset(info, 0, in->bo_number * info_size);
>>> + for (i = 0; i < in->bo_number; ++i) {
>>> if (copy_from_user(&info[i], uptr, bytes))
>>> goto error_free;
>>> - uptr += args->in.bo_info_size;
>>> + uptr += in->bo_info_size;
>>> }
>>> }
>>> + *info_param = info;
>>> + return 0;
>>> +
>>> +error_free:
>>> + kvfree(info);
>>> + return r;
>>> +}
>>> +
>>> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>>> + struct drm_file *filp)
>>> +{
>>> + struct amdgpu_device *adev = dev->dev_private;
>>> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> + union drm_amdgpu_bo_list *args = data;
>>> + uint32_t handle = args->in.list_handle;
>>> + struct drm_amdgpu_bo_list_entry *info = NULL;
>>> + struct amdgpu_bo_list *list;
>>> + int r;
>>> +
>>> + r = amdgpu_bo_create_list_entry_array(&args->in, &info);
>>> + if (r)
>>> + goto error_free;
>>> +
>>> switch (args->in.operation) {
>>> case AMDGPU_BO_LIST_OP_CREATE:
>>> r = amdgpu_bo_list_create(adev, filp, info,
>>> args->in.bo_number,
>>> - &handle);
>>> + &handle, NULL);
>>> if (r)
>>> goto error_free;
>>> break;
>>> @@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev,
>>> void *data,
>>> return 0;
>>> error_free:
>>> - kvfree(info);
>>> + if (info)
>>> + kvfree(info);
>>> return r;
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index 9881a1e..30026b8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct
>>> amdgpu_cs_parser *p,
>>> return 0;
>>> }
>>> -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void
>>> *data)
>>> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
>>> + struct drm_amdgpu_bo_list_in *data) {
>>> + int r;
>>> + struct drm_amdgpu_bo_list_entry *info = NULL;
>>> +
>>> + r = amdgpu_bo_create_list_entry_array(data, &info);
>>> + if (r)
>>> + return r;
>>> +
>>> + r = amdgpu_bo_list_create(p->adev, p->filp, info,
>>> data->bo_number, NULL, &p->bo_list);
>>> + if (r)
>>> + goto error_free;
>>> +
>>> + kvfree(info);
>>> + return 0;
>>> +
>>> +error_free:
>>> + if (info)
>>> + kvfree(info);
>>> +
>>> + return r;
>>> +}
>>> +
>>> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union
>>> +drm_amdgpu_cs *cs)
>>> {
>>> struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>>> struct amdgpu_vm *vm = &fpriv->vm;
>>> - union drm_amdgpu_cs *cs = data;
>>> uint64_t *chunk_array_user;
>>> uint64_t *chunk_array;
>>> unsigned size, num_ibs = 0;
>>> @@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct
>>> amdgpu_cs_parser *p, void *data)
>>> break;
>>> + case AMDGPU_CHUNK_ID_BO_HANDLES:
>>> + size = sizeof(struct drm_amdgpu_bo_list_in);
>>> + if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
>>> + ret = -EINVAL;
>>> + goto free_partial_kdata;
>>> + }
>>> +
>>> + ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
>>> + if (ret)
>>> + goto free_partial_kdata;
>>> +
>>> + break;
>>> +
>>> case AMDGPU_CHUNK_ID_DEPENDENCIES:
>>> case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>>> case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
>>> @@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct
>>> amdgpu_cs_parser *p,
>>> INIT_LIST_HEAD(&p->validated);
>>> - p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
>>> + /* p->bo_list could already be assigned if
>>> AMDGPU_CHUNK_ID_BO_HANDLES is present */
>>> + if (!p->bo_list)
>>> + p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
>>> + else
>>> + mutex_lock(&p->bo_list->lock);
>>> +
>>> if (p->bo_list) {
>>> amdgpu_bo_list_get_list(p->bo_list, &p->validated);
>>> if (p->bo_list->first_userptr != p->bo_list->num_entries) @@
>>> -747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct
>>> amdgpu_cs_parser *p)
>>> * used by parsing context.
>>> **/
>>> static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser
>>> *parser, int error,
>>> - bool backoff)
>>> + bool backoff, int id)
>> Don't need it after you get bo_list without idr.
>>
>>> {
>>> unsigned i;
>>> @@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev,
>>> void *data, struct drm_file *filp)
>>> r = amdgpu_cs_submit(&parser, cs);
>>> out:
>>> - amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>>> + amdgpu_cs_parser_fini(&parser, r, reserved_buffers,
>>> +cs->in.bo_list_handle);
>> Don't need it after you get bo_list without idr.
>>
>> Otherwise it looks really good to me, Reviewed-by: Chunming Zhou
>> <david1.zhou at amd.com>
>>
>>> return r;
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index 06aede1..529500c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -69,9 +69,10 @@
>>> * - 3.24.0 - Add high priority compute support for gfx9
>>> * - 3.25.0 - Add support for sensor query info (stable pstate
>>> sclk/mclk).
>>> * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
>>> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST
>>> creation.
>>> */
>>> #define KMS_DRIVER_MAJOR 3
>>> -#define KMS_DRIVER_MINOR 26
>>> +#define KMS_DRIVER_MINOR 27
>>> #define KMS_DRIVER_PATCHLEVEL 0
>>> int amdgpu_vram_limit = 0;
>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>> b/include/uapi/drm/amdgpu_drm.h index 09741ba..94444ee 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
>>> #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03
>>> #define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04
>>> #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05
>>> +#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06
>>> struct drm_amdgpu_cs_chunk {
>>> __u32 chunk_id;
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list