[Mesa-dev] [RFC 2/3] u_vbuf: add logic to use a limited number of vbufs

Tue Jun 14 10:13:13 UTC 2016

Hi Nicolai,

2016-06-13 11:48 GMT+02:00 Nicolai Hähnle <nhaehnle at gmail.com>:
> On 11.06.2016 21:21, Christian Gmeiner wrote:
>>
>> From: "Wladimir J. van der Laan" <laanwj at gmail.com>
>>
>> Make it possible to limit the number of vertex buffers as there exist
>> GPUs with less then 32 supported vertex buffers.
>>
>> Signed-off-by: Wladimir J. van der Laan <laanwj at gmail.com>
>> ---
>>   src/gallium/auxiliary/util/u_vbuf.c | 45
>> +++++++++++++++++++++++++++++++------
>>   src/gallium/auxiliary/util/u_vbuf.h |  3 +++
>>   2 files changed, 41 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/util/u_vbuf.c
>> b/src/gallium/auxiliary/util/u_vbuf.c
>> index 5b4e527..464c279 100644
>> --- a/src/gallium/auxiliary/util/u_vbuf.c
>> +++ b/src/gallium/auxiliary/util/u_vbuf.c
>> @@ -184,6 +184,8 @@ struct u_vbuf {
>>      uint32_t incompatible_vb_mask; /* each bit describes a corresp.
>> buffer */
>>      /* Which buffer has a non-zero stride. */
>>      uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp.
>> buffer */
>> +   /* Which buffers are allowed (supported by hardware). */
>> +   uint32_t allowed_vb_mask;
>>   };
>>
>>   static void *
>> @@ -291,10 +293,14 @@ boolean u_vbuf_get_caps(struct pipe_screen *screen,
>> struct u_vbuf_caps *caps)
>>      caps->user_vertex_buffers =
>>         screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
>>
>> +   caps->max_vertex_buffers =
>> +      screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
>> +
>>      if (!caps->buffer_offset_unaligned ||
>>          !caps->buffer_stride_unaligned ||
>>          !caps->velem_src_offset_unaligned ||
>> -       !caps->user_vertex_buffers) {
>> +       !caps->user_vertex_buffers ||
>> +       !caps->max_vertex_buffers) {
>>         fallback = TRUE;
>>      }
>>
>> @@ -313,6 +319,7 @@ u_vbuf_create(struct pipe_context *pipe,
>>      mgr->cso_cache = cso_cache_create();
>>      mgr->translate_cache = translate_cache_create();
>>      memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
>> +   mgr->allowed_vb_mask = (1 << mgr->caps.max_vertex_buffers) - 1;
>
>
> This is undefined when max_vertex_buffers is 31 or 32. You can use
> u_bit_consecutive.

Ok.

>
>
>>
>>      mgr->uploader = u_upload_create(pipe, 1024 * 1024,
>>                                      PIPE_BIND_VERTEX_BUFFER,
>> @@ -523,14 +530,15 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct
>> translate_key *key,
>>
>>   static boolean
>>   u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
>> -                                    unsigned mask[VB_NUM])
>> +                                    unsigned mask[VB_NUM],
>> +                                    unsigned extra_free_vb_mask)
>>   {
>>      unsigned type;
>>      unsigned fallback_vbs[VB_NUM];
>>      /* Set the bit for each buffer which is incompatible, or isn't set.
>> */
>>      uint32_t unused_vb_mask =
>> -      mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
>> -      ~mgr->enabled_vb_mask;
>> +      (mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
>> +      ~mgr->enabled_vb_mask | extra_free_vb_mask) & mgr->allowed_vb_mask;
>>
>>      memset(fallback_vbs, ~0, sizeof(fallback_vbs));
>>
>> @@ -573,6 +581,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
>>      unsigned i, type;
>>      unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
>>                                      mgr->ve->used_vb_mask;
>> +   unsigned extra_free_vb_mask = 0;
>>
>>      int start[VB_NUM] = {
>>         start_vertex,     /* VERTEX */
>> @@ -618,8 +627,15 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
>>
>>      assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
>>
>> +   /* In the case of unroll_indices, we can regard all non-constant
>> +    * vertex buffers with only non-instance vertex elements as
>> incompatible
>> +    * and thus free.
>> +    */
>> +   if (unroll_indices)
>> +       extra_free_vb_mask = mask[VB_VERTEX] & ~mask[VB_INSTANCE];
>> +
>>      /* Find free vertex buffer slots. */
>> -   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
>> +   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask,
>> extra_free_vb_mask)) {
>>         return FALSE;
>>      }
>
>
> This logic of using extra space in case of unroll_indices looks unrelated to
> the caps->max_vertex_buffers stuff, can you put it into a separate patch?
>

Yes that makes sense.

>>
>> @@ -778,6 +794,17 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr,
>> unsigned count,
>>         }
>>      }
>>
>> +   if (used_buffers & ~mgr->allowed_vb_mask) {
>> +      /* More vertex buffers are used than the hardware supports.  In
>> +       * principle, we only need to make sure that less vertex buffers
>> are
>> +       * used, and mark some of the latter vertex buffers as
>> incompatible.
>> +       * For now, mark all vertex buffers as incompatible.
>> +       */
>> +      ve->incompatible_vb_mask_any = used_buffers;
>> +      ve->compatible_vb_mask_any = 0;
>> +      ve->incompatible_elem_mask = (1 << count) - 1;
>> +   }
>> +
>>      ve->used_vb_mask = used_buffers;
>>      ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any &
>> used_buffers;
>>      ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any &
>> used_buffers;
>> @@ -790,8 +817,12 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr,
>> unsigned count,
>>         }
>>      }
>>
>> -   ve->driver_cso =
>> -      pipe->create_vertex_elements_state(pipe, count, driver_attribs);
>> +   /* Only create driver CSO if no incompatible elements */
>> +   if (!ve->incompatible_elem_mask) {
>> +      ve->driver_cso =
>> +         pipe->create_vertex_elements_state(pipe, count, driver_attribs);
>> +   }
>> +
>
>
> This looks like a logically separate change, can you put it into a separate
> patch?
>

Sure.

> Cheers,
> Nicolai
>
>
>>      return ve;
>>   }
>>
>> diff --git a/src/gallium/auxiliary/util/u_vbuf.h
>> b/src/gallium/auxiliary/util/u_vbuf.h
>> index 9e8b135..9ff9938 100644
>> --- a/src/gallium/auxiliary/util/u_vbuf.h
>> +++ b/src/gallium/auxiliary/util/u_vbuf.h
>> @@ -52,6 +52,9 @@ struct u_vbuf_caps {
>>
>>      /* Whether the driver supports user vertex buffers. */
>>      unsigned user_vertex_buffers:1;
>> +
>> +   /* Maximum number of vertex buffers */
>> +   unsigned max_vertex_buffers:6;
>>   };
>>
>>
>>
>

Thanks a lot for review.

--
Christian Gmeiner, MSc

https://soundcloud.com/christian-gmeiner