[Mesa-stable] [Mesa-dev] [PATCH] radv/winsys: allow to submit up to 4 IBs for chips without chaining
Juan A. Suarez Romero
jasuarez at igalia.com
Tue Apr 24 07:53:51 UTC 2018
On Sat, 2018-04-21 at 14:38 +0200, Samuel Pitoiset wrote:
> Hi Mark,
>
> Sure, I will do next week.
>
Hello, Samuel.
Would it be possible to get this patch between today and tomorrow? I'd like to
create the candidate next 18.0 release tomorrow, and the current patch doesn't
apply in the branch, as pointed out by Mark.
If you can't provide it, we can skip this patch for this release and add to the
next one.
Thanks.
J.A.
> Thanks.
>
> On 04/21/2018 02:22 AM, Mark Janes wrote:
> > Hi Samuel,
> >
> > This patch doesn't apply cleanly to the stable branches. You may want
> > to send a backport to mesa-stable.
> >
> > -Mark
> >
> > Samuel Pitoiset <samuel.pitoiset at gmail.com> writes:
> >
> > > The SI family doesn't support chaining which means the maximum
> > > size in dwords per CS is limited. When that limit was reached
> > > we failed to submit the CS and the application crashed.
> > >
> > > This patch allows to submit up to 4 IBs which is currently the
> > > limit, but recent amdgpu supports more than that.
> > >
> > > Please note that we can reach the limit of 4 IBs per submit
> > > but currently we can't improve that. The only solution is to
> > > upgrade libdrm. That will be improved later but for now this
> > > should fix crashes on SI or when using RADV_DEBUG=noibs.
> > >
> > > Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.")
> > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775
> > > Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> > > ---
> > > src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 218 ++++++++++++++----
> > > 1 file changed, 168 insertions(+), 50 deletions(-)
> > >
> > > diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> > > index c4b2232ce9e..17e6d8ba2b6 100644
> > > --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> > > +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> > > @@ -68,6 +68,10 @@ struct radv_amdgpu_cs {
> > > struct radeon_winsys_bo **virtual_buffers;
> > > uint8_t *virtual_buffer_priorities;
> > > int *virtual_buffer_hash_table;
> > > +
> > > + /* For chips that don't support chaining. */
> > > + struct radeon_winsys_cs *old_cs_buffers;
> > > + unsigned num_old_cs_buffers;
> > > };
> > >
> > > static inline struct radv_amdgpu_cs *
> > > @@ -201,6 +205,12 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
> > > for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
> > > cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
> > >
> > > + for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
> > > + struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i];
> > > + free(rcs->buf);
> > > + }
> > > +
> > > + free(cs->old_cs_buffers);
> > > free(cs->old_ib_buffers);
> > > free(cs->virtual_buffers);
> > > free(cs->virtual_buffer_priorities);
> > > @@ -286,9 +296,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size)
> > > /* The total ib size cannot exceed limit_dws dwords. */
> > > if (ib_dws > limit_dws)
> > > {
> > > - cs->failed = true;
> > > + /* The maximum size in dwords has been reached,
> > > + * try to allocate a new one.
> > > + */
> > > + if (cs->num_old_cs_buffers + 1 >= AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
> > > + /* TODO: Allow to submit more than 4 IBs. */
> > > + fprintf(stderr, "amdgpu: Maximum number of IBs "
> > > + "per submit reached.\n");
> > > + cs->failed = true;
> > > + cs->base.cdw = 0;
> > > + return;
> > > + }
> > > +
> > > + cs->old_cs_buffers =
> > > + realloc(cs->old_cs_buffers,
> > > + (cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers));
> > > + if (!cs->old_cs_buffers) {
> > > + cs->failed = true;
> > > + cs->base.cdw = 0;
> > > + return;
> > > + }
> > > +
> > > + /* Store the current one for submitting it later. */
> > > + cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw;
> > > + cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw;
> > > + cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf;
> > > + cs->num_old_cs_buffers++;
> > > +
> > > + /* Reset the cs, it will be re-allocated below. */
> > > cs->base.cdw = 0;
> > > - return;
> > > + cs->base.buf = NULL;
> > > +
> > > + /* Re-compute the number of dwords to allocate. */
> > > + ib_dws = MAX2(cs->base.cdw + min_size,
> > > + MIN2(cs->base.max_dw * 2, limit_dws));
> > > + if (ib_dws > limit_dws) {
> > > + fprintf(stderr, "amdgpu: Too high number of "
> > > + "dwords to allocate\n");
> > > + cs->failed = true;
> > > + return;
> > > + }
> > > }
> > >
> > > uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
> > > @@ -400,6 +447,15 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
> > > cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
> > > cs->ib_size_ptr = &cs->ib.size;
> > > cs->ib.size = 0;
> > > + } else {
> > > + for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
> > > + struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i];
> > > + free(rcs->buf);
> > > + }
> > > +
> > > + free(cs->old_cs_buffers);
> > > + cs->old_cs_buffers = NULL;
> > > + cs->num_old_cs_buffers = 0;
> > > }
> > > }
> > >
> > > @@ -550,7 +606,8 @@ static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
> > > static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
> > > struct radeon_winsys_cs **cs_array,
> > > unsigned count,
> > > - struct radv_amdgpu_winsys_bo *extra_bo,
> > > + struct radv_amdgpu_winsys_bo **extra_bo_array,
> > > + unsigned num_extra_bo,
> > > struct radeon_winsys_cs *extra_cs,
> > > const struct radv_winsys_bo_list *radv_bo_list,
> > > amdgpu_bo_list_handle *bo_list)
> > > @@ -580,7 +637,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
> > > bo_list);
> > > free(handles);
> > > pthread_mutex_unlock(&ws->global_bo_list_lock);
> > > - } else if (count == 1 && !extra_bo && !extra_cs && !radv_bo_list &&
> > > + } else if (count == 1 && !num_extra_bo && !extra_cs && !radv_bo_list &&
> > > !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
> > > struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
> > > if (cs->num_buffers == 0) {
> > > @@ -590,8 +647,8 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
> > > r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
> > > cs->priorities, bo_list);
> > > } else {
> > > - unsigned total_buffer_count = !!extra_bo;
> > > - unsigned unique_bo_count = !!extra_bo;
> > > + unsigned total_buffer_count = num_extra_bo;
> > > + unsigned unique_bo_count = num_extra_bo;
> > > for (unsigned i = 0; i < count; ++i) {
> > > struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
> > > total_buffer_count += cs->num_buffers;
> > > @@ -619,9 +676,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
> > > return -ENOMEM;
> > > }
> > >
> > > - if (extra_bo) {
> > > - handles[0] = extra_bo->bo;
> > > - priorities[0] = 8;
> > > + for (unsigned i = 0; i < num_extra_bo; i++) {
> > > + handles[i] = extra_bo_array[i]->bo;
> > > + priorities[i] = 8;
> > > }
> > >
> > > for (unsigned i = 0; i < count + !!extra_cs; ++i) {
> > > @@ -773,7 +830,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
> > > }
> > > }
> > >
> > > - r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs,
> > > + r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, 0, initial_preamble_cs,
> > > radv_bo_list, &bo_list);
> > > if (r) {
> > > fprintf(stderr, "amdgpu: buffer list creation failed for the "
> > > @@ -842,7 +899,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
> > >
> > > memset(&request, 0, sizeof(request));
> > >
> > > - r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL,
> > > + r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0,
> > > preamble_cs, radv_bo_list, &bo_list);
> > > if (r) {
> > > fprintf(stderr, "amdgpu: buffer list creation failed "
> > > @@ -923,68 +980,127 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
> > > assert(cs_count);
> > >
> > > for (unsigned i = 0; i < cs_count;) {
> > > - struct amdgpu_cs_ib_info ib = {0};
> > > - struct radeon_winsys_bo *bo = NULL;
> > > + struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
> > > + unsigned number_of_ibs = 1;
> > > + struct radeon_winsys_bo *bos[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
> > > struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
> > > + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
> > > uint32_t *ptr;
> > > unsigned cnt = 0;
> > > unsigned size = 0;
> > > unsigned pad_words = 0;
> > > - if (preamble_cs)
> > > - size += preamble_cs->cdw;
> > >
> > > - while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
> > > - size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
> > > - ++cnt;
> > > - }
> > > + if (cs->num_old_cs_buffers > 0) {
> > > + /* Special path when the maximum size in dwords has
> > > + * been reached because we need to handle more than one
> > > + * IB per submit.
> > > + */
> > > + unsigned new_cs_count = cs->num_old_cs_buffers + 1;
> > > + struct radeon_winsys_cs *new_cs_array[4];
> > > + unsigned idx = 0;
> > > +
> > > + for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
> > > + new_cs_array[idx++] = &cs->old_cs_buffers[j];
> > > + new_cs_array[idx++] = cs_array[i];
> > > +
> > > + for (unsigned j = 0; j < new_cs_count; j++) {
> > > + struct radeon_winsys_cs *rcs = new_cs_array[j];
> > > + bool needs_preamble = preamble_cs && j == 0;
> > > + unsigned size = 0;
> > > +
> > > + if (needs_preamble)
> > > + size += preamble_cs->cdw;
> > > + size += rcs->cdw;
> > > +
> > > + assert(size < 0xffff8);
> > > +
> > > + while(!size || (size & 7)) {
> > > + size++;
> > > + pad_words++;
> > > + }
> > >
> > > - while(!size || (size & 7)) {
> > > - size++;
> > > - pad_words++;
> > > - }
> > > - assert(cnt);
> > > + bos[j] = ws->buffer_create(ws, 4 * size, 4096,
> > > + RADEON_DOMAIN_GTT,
> > > + RADEON_FLAG_CPU_ACCESS |
> > > + RADEON_FLAG_NO_INTERPROCESS_SHARING |
> > > + RADEON_FLAG_READ_ONLY);
> > > + ptr = ws->buffer_map(bos[j]);
> > >
> > > - bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT,
> > > - RADEON_FLAG_CPU_ACCESS |
> > > - RADEON_FLAG_NO_INTERPROCESS_SHARING |
> > > - RADEON_FLAG_READ_ONLY);
> > > - ptr = ws->buffer_map(bo);
> > > + if (needs_preamble) {
> > > + memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
> > > + ptr += preamble_cs->cdw;
> > > + }
> > >
> > > - if (preamble_cs) {
> > > - memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
> > > - ptr += preamble_cs->cdw;
> > > - }
> > > + memcpy(ptr, rcs->buf, 4 * rcs->cdw);
> > > + ptr += rcs->cdw;
> > >
> > > - for (unsigned j = 0; j < cnt; ++j) {
> > > - struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
> > > - memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
> > > - ptr += cs->base.cdw;
> > > + for (unsigned k = 0; k < pad_words; ++k)
> > > + *ptr++ = pad_word;
> > >
> > > - }
> > > + ibs[j].size = size;
> > > + ibs[j].ib_mc_address = radv_buffer_get_va(bos[j]);
> > > + }
> > >
> > > - for (unsigned j = 0; j < pad_words; ++j)
> > > - *ptr++ = pad_word;
> > > + number_of_ibs = new_cs_count;
> > > + cnt++;
> > > + } else {
> > > + if (preamble_cs)
> > > + size += preamble_cs->cdw;
> > >
> > > - memset(&request, 0, sizeof(request));
> > > + while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
> > > + size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
> > > + ++cnt;
> > > + }
> > > +
> > > + while (!size || (size & 7)) {
> > > + size++;
> > > + pad_words++;
> > > + }
> > > + assert(cnt);
> > >
> > > + bos[0] = ws->buffer_create(ws, 4 * size, 4096,
> > > + RADEON_DOMAIN_GTT,
> > > + RADEON_FLAG_CPU_ACCESS |
> > > + RADEON_FLAG_NO_INTERPROCESS_SHARING |
> > > + RADEON_FLAG_READ_ONLY);
> > > + ptr = ws->buffer_map(bos[0]);
> > > +
> > > + if (preamble_cs) {
> > > + memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
> > > + ptr += preamble_cs->cdw;
> > > + }
> > > +
> > > + for (unsigned j = 0; j < cnt; ++j) {
> > > + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
> > > + memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
> > > + ptr += cs->base.cdw;
> > > +
> > > + }
> > > +
> > > + for (unsigned j = 0; j < pad_words; ++j)
> > > + *ptr++ = pad_word;
> > > +
> > > + ibs[0].size = size;
> > > + ibs[0].ib_mc_address = radv_buffer_get_va(bos[0]);
> > > + }
> > >
> > > r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
> > > - (struct radv_amdgpu_winsys_bo*)bo,
> > > - preamble_cs, radv_bo_list, &bo_list);
> > > + (struct radv_amdgpu_winsys_bo **)bos,
> > > + number_of_ibs, preamble_cs,
> > > + radv_bo_list, &bo_list);
> > > if (r) {
> > > fprintf(stderr, "amdgpu: buffer list creation failed "
> > > "for the sysmem submission (%d)\n", r);
> > > return r;
> > > }
> > >
> > > - ib.size = size;
> > > - ib.ib_mc_address = radv_buffer_get_va(bo);
> > > + memset(&request, 0, sizeof(request));
> > >
> > > request.ip_type = cs0->hw_ip;
> > > request.ring = queue_idx;
> > > request.resources = bo_list;
> > > - request.number_of_ibs = 1;
> > > - request.ibs = &ib;
> > > + request.number_of_ibs = number_of_ibs;
> > > + request.ibs = ibs;
> > > request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
> > >
> > > sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
> > > @@ -1000,9 +1116,11 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
> > > if (bo_list)
> > > amdgpu_bo_list_destroy(bo_list);
> > >
> > > - ws->buffer_destroy(bo);
> > > - if (r)
> > > - return r;
> > > + for (unsigned j = 0; j < number_of_ibs; j++) {
> > > + ws->buffer_destroy(bos[j]);
> > > + if (r)
> > > + return r;
> > > + }
> > >
> > > i += cnt;
> > > }
> > > --
> > > 2.17.0
> > >
> > > _______________________________________________
> > > mesa-dev mailing list
> > > mesa-dev at lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
> _______________________________________________
> mesa-stable mailing list
> mesa-stable at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
More information about the mesa-stable
mailing list