Mesa (main): gallium: add PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Nov 3 20:33:06 UTC 2021


Module: Mesa
Branch: main
Commit: 8297d243fb3bc8c7f821c58cf50cb8c2b5e9263f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=8297d243fb3bc8c7f821c58cf50cb8c2b5e9263f

Author: Mike Blumenkrantz <michael.blumenkrantz at gmail.com>
Date:   Wed Oct 27 14:28:53 2021 -0400

gallium: add PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY

vulkan requires that vertex attribute access be aligned to the size of
a component for the attribute, but GL has no such requirements

the existing alignment caps are unnecessarily restrictive for applying
this limitation, so this cap now pre-calculates the masks for elements
and vertex buffers in vbuf to enable rewriting misaligned buffers

Acked-by: Marek Olšák <marek.olsak at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13556>

---

 docs/gallium/screen.rst               |  6 +++
 src/gallium/auxiliary/util/u_screen.c |  1 +
 src/gallium/auxiliary/util/u_vbuf.c   | 76 ++++++++++++++++++++++++++++++-----
 src/gallium/auxiliary/util/u_vbuf.h   |  1 +
 src/gallium/include/pipe/p_defines.h  |  1 +
 5 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/docs/gallium/screen.rst b/docs/gallium/screen.rst
index b8f3c2fcf54..fa8e7f2cd2d 100644
--- a/docs/gallium/screen.rst
+++ b/docs/gallium/screen.rst
@@ -139,6 +139,12 @@ The integer capabilities:
 * ``PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY``: This CAP describes
   a hw limitation.  If true, pipe_vertex_element::src_offset must always be
   aligned to 4.  If false, there are no restrictions on src_offset.
+* ``PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY``: This CAP describes
+  a hw limitation.  If true, the sum of
+  ``pipe_vertex_element::src_offset + pipe_vertex_buffer::buffer_offset + pipe_vertex_buffer::stride``
+  must always be aligned to the component size for the vertex attributes
+  which access that buffer.  If false, there are no restrictions on these values.
+  This CAP cannot be used with any other alignment-requiring CAPs.
 * ``PIPE_CAP_COMPUTE``: Whether the implementation supports the
   compute entry points defined in pipe_context and pipe_screen.
 * ``PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT``: Describes the required
diff --git a/src/gallium/auxiliary/util/u_screen.c b/src/gallium/auxiliary/util/u_screen.c
index eba554600fd..c7e26d54da4 100644
--- a/src/gallium/auxiliary/util/u_screen.c
+++ b/src/gallium/auxiliary/util/u_screen.c
@@ -128,6 +128,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen,
    case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY:
    case PIPE_CAP_COMPUTE:
       return 0;
 
diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c
index ef28c628da9..ce44a131080 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -112,6 +112,7 @@ struct u_vbuf_elements {
     * its vertex data must be translated to native_format[i]. */
    enum pipe_format native_format[PIPE_MAX_ATTRIBS];
    unsigned native_format_size[PIPE_MAX_ATTRIBS];
+   unsigned component_size[PIPE_MAX_ATTRIBS];
 
    /* Which buffers are used by the vertex element state. */
    uint32_t used_vb_mask;
@@ -127,6 +128,7 @@ struct u_vbuf_elements {
    /* Which buffer has at least one vertex element referencing it
     * compatible. */
    uint32_t compatible_vb_mask_any;
+   uint32_t vb_align_mask[2]; //which buffers require 2/4 byte alignments
    /* Which buffer has all vertex elements referencing it compatible. */
    uint32_t compatible_vb_mask_all;
 
@@ -163,6 +165,8 @@ struct u_vbuf {
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
    uint32_t enabled_vb_mask;
 
+   uint32_t unaligned_vb_mask[2]; //16/32bit
+
    /* Vertex buffers for the driver.
     * There are usually no user buffers. */
    struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
@@ -303,6 +307,11 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps,
    caps->velem_src_offset_unaligned =
       !screen->get_param(screen,
                          PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
+   caps->attrib_component_unaligned =
+      !screen->get_param(screen,
+                         PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY);
+   assert(caps->attrib_component_unaligned ||
+          (caps->velem_src_offset_unaligned && caps->buffer_stride_unaligned && caps->buffer_offset_unaligned));
    caps->user_vertex_buffers =
       screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
    caps->max_vertex_buffers =
@@ -330,6 +339,7 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps,
 
    if (!caps->buffer_offset_unaligned ||
        !caps->buffer_stride_unaligned ||
+       !caps->attrib_component_unaligned ||
        !caps->velem_src_offset_unaligned)
       caps->fallback_always = true;
 
@@ -668,13 +678,14 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
                        const struct pipe_draw_info *info,
                        const struct pipe_draw_start_count_bias *draw,
                        int start_vertex, unsigned num_vertices,
-                       int min_index, boolean unroll_indices)
+                       int min_index, boolean unroll_indices,
+                       uint32_t misaligned)
 {
    unsigned mask[VB_NUM] = {0};
    struct translate_key key[VB_NUM];
    unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
    unsigned i, type;
-   const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
+   const unsigned incompatible_vb_mask = (misaligned | mgr->incompatible_vb_mask) &
                                          mgr->ve->used_vb_mask;
 
    const int start[VB_NUM] = {
@@ -726,6 +737,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
       return FALSE;
    }
 
+   unsigned min_alignment[VB_NUM] = {0};
    /* Initialize the translate keys. */
    for (i = 0; i < mgr->ve->count; i++) {
       struct translate_key *k;
@@ -764,15 +776,25 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
       te->input_offset = mgr->ve->ve[i].src_offset;
       te->output_format = output_format;
       te->output_offset = k->output_stride;
+      unsigned adjustment = 0;
+      if (!mgr->caps.attrib_component_unaligned &&
+          te->output_offset % mgr->ve->component_size[i] != 0) {
+         unsigned aligned = align(te->output_offset, mgr->ve->component_size[i]);
+         adjustment = aligned - te->output_offset;
+         te->output_offset = aligned;
+      }
 
-      k->output_stride += mgr->ve->native_format_size[i];
+      k->output_stride += mgr->ve->native_format_size[i] + adjustment;
       k->nr_elements++;
+      min_alignment[type] = MAX2(min_alignment[type], mgr->ve->component_size[i]);
    }
 
    /* Translate buffers. */
    for (type = 0; type < VB_NUM; type++) {
       if (key[type].nr_elements) {
          enum pipe_error err;
+         if (!mgr->caps.attrib_component_unaligned)
+            key[type].output_stride = align(key[type].output_stride, min_alignment[type]);
          err = u_vbuf_translate_buffers(mgr, &key[type], info, draw,
                                         mask[type], mgr->fallback_vbs[type],
                                         start[type], num[type], min_index,
@@ -880,13 +902,27 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
       ve->native_format_size[i] =
             util_format_get_blocksize(ve->native_format[i]);
 
+      const struct util_format_description *desc = util_format_description(format);
+      bool is_packed = false;
+      for (unsigned c = 0; c < desc->nr_channels; c++)
+         is_packed |= desc->channel[c].size != desc->channel[0].size || desc->channel[c].size % 8 != 0;
+      unsigned component_size = is_packed ?
+                                ve->native_format_size[i] : (ve->native_format_size[i] / desc->nr_channels);
+      ve->component_size[i] = component_size;
+
       if (ve->ve[i].src_format != format ||
           (!mgr->caps.velem_src_offset_unaligned &&
-           ve->ve[i].src_offset % 4 != 0)) {
+           ve->ve[i].src_offset % 4 != 0) ||
+          (!mgr->caps.attrib_component_unaligned &&
+           ve->ve[i].src_offset % component_size != 0)) {
          ve->incompatible_elem_mask |= 1 << i;
          ve->incompatible_vb_mask_any |= vb_index_bit;
       } else {
          ve->compatible_vb_mask_any |= vb_index_bit;
+         if (component_size == 2)
+            ve->vb_align_mask[0] |= vb_index_bit;
+         else if (component_size == 4)
+            ve->vb_align_mask[1] |= vb_index_bit;
       }
    }
 
@@ -958,6 +994,8 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
    mgr->incompatible_vb_mask &= mask;
    mgr->nonzero_stride_vb_mask &= mask;
    mgr->enabled_vb_mask &= mask;
+   mgr->unaligned_vb_mask[0] &= mask;
+   mgr->unaligned_vb_mask[1] &= mask;
 
    if (!bufs) {
       struct pipe_context *pipe = mgr->pipe;
@@ -1011,6 +1049,13 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
          continue;
       }
 
+      if (!mgr->caps.attrib_component_unaligned) {
+         if (vb->buffer_offset % 2 != 0 || vb->stride % 2 != 0)
+            mgr->unaligned_vb_mask[0] |= BITFIELD_BIT(dst_index);
+         if (vb->buffer_offset % 4 != 0 || vb->stride % 4 != 0)
+            mgr->unaligned_vb_mask[1] |= BITFIELD_BIT(dst_index);
+      }
+
       if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) {
          user_vb_mask |= 1 << dst_index;
          real_vb->buffer_offset = vb->buffer_offset;
@@ -1183,7 +1228,7 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
    return PIPE_OK;
 }
 
-static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
+static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr, uint32_t misaligned)
 {
    /* See if there are any per-vertex attribs which will be uploaded or
     * translated. Use bitmasks to get the info instead of looping over vertex
@@ -1191,12 +1236,13 @@ static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
    return (mgr->ve->used_vb_mask &
            ((mgr->user_vb_mask |
              mgr->incompatible_vb_mask |
+             misaligned |
              mgr->ve->incompatible_vb_mask_any) &
             mgr->ve->noninstance_vb_mask_any &
             mgr->nonzero_stride_vb_mask)) != 0;
 }
 
-static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
+static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr, uint32_t misaligned)
 {
    /* Return true if there are hw buffers which don't need to be translated.
     *
@@ -1205,6 +1251,7 @@ static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
    return (mgr->ve->used_vb_mask &
            (~mgr->user_vb_mask &
             ~mgr->incompatible_vb_mask &
+            ~misaligned &
             mgr->ve->compatible_vb_mask_all &
             mgr->ve->noninstance_vb_mask_any &
             mgr->nonzero_stride_vb_mask)) != 0;
@@ -1390,12 +1437,19 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
    boolean unroll_indices = FALSE;
    const uint32_t used_vb_mask = mgr->ve->used_vb_mask;
    uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
-   const uint32_t incompatible_vb_mask =
-      mgr->incompatible_vb_mask & used_vb_mask;
    struct pipe_draw_info new_info;
    struct pipe_draw_start_count_bias new_draw;
    unsigned fixed_restart_index = info->index_size ? util_prim_restart_index_from_size(info->index_size) : 0;
 
+   uint32_t misaligned = 0;
+   if (!mgr->caps.attrib_component_unaligned) {
+      for (unsigned i = 0; i < ARRAY_SIZE(mgr->unaligned_vb_mask); i++) {
+         misaligned |= mgr->ve->vb_align_mask[i] & mgr->unaligned_vb_mask[i];
+      }
+   }
+   const uint32_t incompatible_vb_mask =
+      (mgr->incompatible_vb_mask | misaligned) & used_vb_mask;
+
    /* Normal draw. No fallback and no user buffers. */
    if (!incompatible_vb_mask &&
        !mgr->ve->incompatible_elem_mask &&
@@ -1583,7 +1637,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
 
    if (new_info.index_size) {
       /* See if anything needs to be done for per-vertex attribs. */
-      if (u_vbuf_need_minmax_index(mgr)) {
+      if (u_vbuf_need_minmax_index(mgr, misaligned)) {
          unsigned max_index;
 
          if (new_info.index_bounds_valid) {
@@ -1606,7 +1660,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
          if (!indirect &&
              !new_info.primitive_restart &&
              util_is_vbo_upload_ratio_too_large(new_draw.count, num_vertices) &&
-             !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
+             !u_vbuf_mapping_vertex_buffer_blocks(mgr, misaligned)) {
             unroll_indices = TRUE;
             user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
                               mgr->ve->noninstance_vb_mask_any);
@@ -1629,7 +1683,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
        mgr->ve->incompatible_elem_mask) {
       if (!u_vbuf_translate_begin(mgr, &new_info, &new_draw,
                                   start_vertex, num_vertices,
-                                  min_index, unroll_indices)) {
+                                  min_index, unroll_indices, misaligned)) {
          debug_warn_once("u_vbuf_translate_begin() failed");
          goto cleanup;
       }
diff --git a/src/gallium/auxiliary/util/u_vbuf.h b/src/gallium/auxiliary/util/u_vbuf.h
index 5cefac56700..b0bd06bc979 100644
--- a/src/gallium/auxiliary/util/u_vbuf.h
+++ b/src/gallium/auxiliary/util/u_vbuf.h
@@ -50,6 +50,7 @@ struct u_vbuf_caps {
    unsigned buffer_offset_unaligned:1;
    unsigned buffer_stride_unaligned:1;
    unsigned velem_src_offset_unaligned:1;
+   unsigned attrib_component_unaligned:1;
 
    /* Whether the driver supports user vertex buffers. */
    unsigned user_vertex_buffers:1;
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 5c009017ac5..b236d50fb45 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -792,6 +792,7 @@ enum pipe_cap
    PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY,
    PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY,
    PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY,
+   PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY,
    PIPE_CAP_COMPUTE,
    PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT,
    PIPE_CAP_START_INSTANCE,



More information about the mesa-commit mailing list