[Mesa-dev] [RFC 2/3] u_vbuf: add logic to use a limited number of vbufs

Mon Jun 13 09:48:32 UTC 2016

On 11.06.2016 21:21, Christian Gmeiner wrote:
> From: "Wladimir J. van der Laan" <laanwj at gmail.com>
>
> Make it possible to limit the number of vertex buffers as there exist
> GPUs with less then 32 supported vertex buffers.
>
> Signed-off-by: Wladimir J. van der Laan <laanwj at gmail.com>
> ---
>   src/gallium/auxiliary/util/u_vbuf.c | 45 +++++++++++++++++++++++++++++++------
>   src/gallium/auxiliary/util/u_vbuf.h |  3 +++
>   2 files changed, 41 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c
> index 5b4e527..464c279 100644
> --- a/src/gallium/auxiliary/util/u_vbuf.c
> +++ b/src/gallium/auxiliary/util/u_vbuf.c
> @@ -184,6 +184,8 @@ struct u_vbuf {
>      uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
>      /* Which buffer has a non-zero stride. */
>      uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
> +   /* Which buffers are allowed (supported by hardware). */
> +   uint32_t allowed_vb_mask;
>   };
>
>   static void *
> @@ -291,10 +293,14 @@ boolean u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps)
>      caps->user_vertex_buffers =
>         screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
>
> +   caps->max_vertex_buffers =
> +      screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
> +
>      if (!caps->buffer_offset_unaligned ||
>          !caps->buffer_stride_unaligned ||
>          !caps->velem_src_offset_unaligned ||
> -       !caps->user_vertex_buffers) {
> +       !caps->user_vertex_buffers ||
> +       !caps->max_vertex_buffers) {
>         fallback = TRUE;
>      }
>
> @@ -313,6 +319,7 @@ u_vbuf_create(struct pipe_context *pipe,
>      mgr->cso_cache = cso_cache_create();
>      mgr->translate_cache = translate_cache_create();
>      memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
> +   mgr->allowed_vb_mask = (1 << mgr->caps.max_vertex_buffers) - 1;

This is undefined when max_vertex_buffers is 31 or 32. You can use 
u_bit_consecutive.

>
>      mgr->uploader = u_upload_create(pipe, 1024 * 1024,
>                                      PIPE_BIND_VERTEX_BUFFER,
> @@ -523,14 +530,15 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
>
>   static boolean
>   u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
> -                                    unsigned mask[VB_NUM])
> +                                    unsigned mask[VB_NUM],
> +                                    unsigned extra_free_vb_mask)
>   {
>      unsigned type;
>      unsigned fallback_vbs[VB_NUM];
>      /* Set the bit for each buffer which is incompatible, or isn't set. */
>      uint32_t unused_vb_mask =
> -      mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
> -      ~mgr->enabled_vb_mask;
> +      (mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
> +      ~mgr->enabled_vb_mask | extra_free_vb_mask) & mgr->allowed_vb_mask;
>
>      memset(fallback_vbs, ~0, sizeof(fallback_vbs));
>
> @@ -573,6 +581,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
>      unsigned i, type;
>      unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
>                                      mgr->ve->used_vb_mask;
> +   unsigned extra_free_vb_mask = 0;
>
>      int start[VB_NUM] = {
>         start_vertex,     /* VERTEX */
> @@ -618,8 +627,15 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
>
>      assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
>
> +   /* In the case of unroll_indices, we can regard all non-constant
> +    * vertex buffers with only non-instance vertex elements as incompatible
> +    * and thus free.
> +    */
> +   if (unroll_indices)
> +       extra_free_vb_mask = mask[VB_VERTEX] & ~mask[VB_INSTANCE];
> +
>      /* Find free vertex buffer slots. */
> -   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
> +   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask, extra_free_vb_mask)) {
>         return FALSE;
>      }

This logic of using extra space in case of unroll_indices looks 
unrelated to the caps->max_vertex_buffers stuff, can you put it into a 
separate patch?

>
> @@ -778,6 +794,17 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
>         }
>      }
>
> +   if (used_buffers & ~mgr->allowed_vb_mask) {
> +      /* More vertex buffers are used than the hardware supports.  In
> +       * principle, we only need to make sure that less vertex buffers are
> +       * used, and mark some of the latter vertex buffers as incompatible.
> +       * For now, mark all vertex buffers as incompatible.
> +       */
> +      ve->incompatible_vb_mask_any = used_buffers;
> +      ve->compatible_vb_mask_any = 0;
> +      ve->incompatible_elem_mask = (1 << count) - 1;
> +   }
> +
>      ve->used_vb_mask = used_buffers;
>      ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
>      ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
> @@ -790,8 +817,12 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
>         }
>      }
>
> -   ve->driver_cso =
> -      pipe->create_vertex_elements_state(pipe, count, driver_attribs);
> +   /* Only create driver CSO if no incompatible elements */
> +   if (!ve->incompatible_elem_mask) {
> +      ve->driver_cso =
> +         pipe->create_vertex_elements_state(pipe, count, driver_attribs);
> +   }
> +

This looks like a logically separate change, can you put it into a 
separate patch?

Cheers,
Nicolai

>      return ve;
>   }
>
> diff --git a/src/gallium/auxiliary/util/u_vbuf.h b/src/gallium/auxiliary/util/u_vbuf.h
> index 9e8b135..9ff9938 100644
> --- a/src/gallium/auxiliary/util/u_vbuf.h
> +++ b/src/gallium/auxiliary/util/u_vbuf.h
> @@ -52,6 +52,9 @@ struct u_vbuf_caps {
>
>      /* Whether the driver supports user vertex buffers. */
>      unsigned user_vertex_buffers:1;
> +
> +   /* Maximum number of vertex buffers */
> +   unsigned max_vertex_buffers:6;
>   };
>
>
>