[Mesa-dev] [PATCH 1/2] radv/winsys: remove the max IBs per submit limit for the fallback path
Bas Nieuwenhuizen
basni at chromium.org
Thu Nov 15 11:14:07 UTC 2018
Nice, seems to halve CPU cost of submitting 100 command buffers in 1
submit with the simultaneous use flag set.
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
for the series.
On Thu, Nov 15, 2018 at 11:27 AM Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
>
> The chained submission is the fastest path and it should now
> be used more often than before. This removes some EOP events.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
> src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 103 ++++++++++--------
> 1 file changed, 55 insertions(+), 48 deletions(-)
>
> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> index abc4f3903d..f2d07a54db 100644
> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> @@ -865,66 +865,73 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
> struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
> struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
> amdgpu_bo_list_handle bo_list;
> - struct amdgpu_cs_request request;
> - bool emit_signal_sem = sem_info->cs_emit_signal;
> + struct amdgpu_cs_request request = {};
> + struct amdgpu_cs_ib_info *ibs;
> + struct radv_amdgpu_cs *cs0;
> + unsigned number_of_ibs;
> +
> assert(cs_count);
> + cs0 = radv_amdgpu_cs(cs_array[0]);
>
> - for (unsigned i = 0; i < cs_count;) {
> - struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
> - struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
> - struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
> - unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs,
> - cs_count - i);
> + /* Compute the number of IBs for this submit. */
> + number_of_ibs = cs_count + !!initial_preamble_cs;
>
> - memset(&request, 0, sizeof(request));
> + /* Create a buffer object list. */
> + r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0,
> + initial_preamble_cs, radv_bo_list,
> + &bo_list);
> + if (r) {
> + fprintf(stderr, "amdgpu: buffer list creation failed "
> + "for the fallback submission (%d)\n", r);
> + return r;
> + }
>
> - r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0,
> - preamble_cs, radv_bo_list, &bo_list);
> - if (r) {
> - fprintf(stderr, "amdgpu: buffer list creation failed "
> - "for the fallback submission (%d)\n", r);
> - return r;
> - }
> + ibs = malloc(number_of_ibs * sizeof(*ibs));
> + if (!ibs) {
> + if (bo_list)
> + amdgpu_bo_list_destroy(bo_list);
> + return -ENOMEM;
> + }
>
> - request.ip_type = cs0->hw_ip;
> - request.ring = queue_idx;
> - request.resources = bo_list;
> - request.number_of_ibs = cnt + !!preamble_cs;
> - request.ibs = ibs;
> - request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
> + /* Configure the CS request. */
> + if (initial_preamble_cs)
> + ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
>
> - if (preamble_cs) {
> - ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
> - }
> + for (unsigned i = 0; i < cs_count; i++) {
> + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
>
> - for (unsigned j = 0; j < cnt; ++j) {
> - struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
> - ibs[j + !!preamble_cs] = cs->ib;
> + ibs[i + !!initial_preamble_cs] = cs->ib;
>
> - if (cs->is_chained) {
> - *cs->ib_size_ptr -= 4;
> - cs->is_chained = false;
> - }
> + if (cs->is_chained) {
> + *cs->ib_size_ptr -= 4;
> + cs->is_chained = false;
> }
> + }
>
> - sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
> - r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
> - if (r) {
> - if (r == -ENOMEM)
> - fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
> - else
> - fprintf(stderr, "amdgpu: The CS has been rejected, "
> - "see dmesg for more information.\n");
> - }
> + request.ip_type = cs0->hw_ip;
> + request.ring = queue_idx;
> + request.resources = bo_list;
> + request.number_of_ibs = number_of_ibs;
> + request.ibs = ibs;
> + request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
>
> - if (bo_list)
> - amdgpu_bo_list_destroy(bo_list);
> + /* Submit the CS. */
> + r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
> + if (r) {
> + if (r == -ENOMEM)
> + fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
> + else
> + fprintf(stderr, "amdgpu: The CS has been rejected, "
> + "see dmesg for more information.\n");
> + }
>
> - if (r)
> - return r;
> + if (bo_list)
> + amdgpu_bo_list_destroy(bo_list);
> + free(ibs);
> +
> + if (r)
> + return r;
>
> - i += cnt;
> - }
> if (fence)
> radv_amdgpu_request_to_fence(ctx, fence, &request);
>
> @@ -1131,7 +1138,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
> if (!cs->ws->use_ib_bos) {
> ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, bo_list, cs_array,
> cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
> - } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
> + } else if (can_patch && cs->ws->batchchain) {
> ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, bo_list, cs_array,
> cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
> } else {
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list