<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Thu, Oct 12, 2017 at 11:38 AM, Jose Maria Casanova Crespo <span dir="ltr"><<a href="mailto:jmcasanova@igalia.com" target="_blank">jmcasanova@igalia.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">We enable the use of 16-bit values in push constants<br>
modifying the assign_constant_locations function to work<br>
with 16-bit types.<br>
<br>
The API to access buffers in Vulkan use multiples of 4-byte for<br>
offsets and sizes. Current accountability of uniforms based on 4-byte<br>
slots will work for 16-bit values if they are allowed to use 32-bit<br>
slots. For that, we replace the division by 4 by a DIV_ROUND_UP, so<br>
2-byte elements will use 1 slot instead of 0.<br></blockquote><div><br></div><div>I'm fairly sure this doesn't actually work correctly.  That said, I'm also fairly sure the current code is broken for 64 bits.  In particular, let's suppose we have something like this:</div><div><br></div><div>layout(push_constant) {</div><div>    struct {</div><div>        int i;</div><div>        int pad1;<br></div><div>        float f;</div><div>        double d;</div><div>        int pad2;<br></div><div>    } arr[2];</div><div>}<br></div><div><br></div><div>main()</div><div>{</div><div>    out_color = vec4(arr[arr[0].i].f, float(arr[arr[0].i].d), arr[arr[1].i].f, float(arr[arr[1].i].d));<br></div><div>}</div><div><br></div><div>I'm pretty sure the current code will explode because it will see a single contiguous chunk that's neither 32 nor 64-bit.  If that particular shader doesn't break it, I'm sure some permutation of it will.  Things only get worse if we throw in 16-bit.</div><div><br></div><div>Ultimately, I think the solution is to throw away our current scheme of trying to separate things out by bit size and move to a scheme where we work with everything in bytes in assign_constant_locations and trust in the contiguous[] array to determine where to split things up.  We would probably want to continue only re-arranging things 4 bytes at a time.</div><div><br></div><div>That said, I don't think this patch makes anything any worse...<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
We aligns the 16-bit locations after assigning the 32-bit<br>
ones.<br>
---<br>
 src/intel/compiler/brw_fs.cpp | 30 +++++++++++++++++++++++-------<br>
 1 file changed, 23 insertions(+), 7 deletions(-)<br>
<br>
diff --git a/src/intel/compiler/brw_fs.<wbr>cpp b/src/intel/compiler/brw_fs.<wbr>cpp<br>
index a1d49a63be..8da16145dc 100644<br>
--- a/src/intel/compiler/brw_fs.<wbr>cpp<br>
+++ b/src/intel/compiler/brw_fs.<wbr>cpp<br>
@@ -1909,8 +1909,9 @@ set_push_pull_constant_loc(<wbr>unsigned uniform, int *chunk_start,<br>
    if (!contiguous) {<br>
       /* If bitsize doesn't match the target one, skip it */<br>
       if (*max_chunk_bitsize != target_bitsize) {<br>
-         /* FIXME: right now we only support 32 and 64-bit accesses */<br>
-         assert(*max_chunk_bitsize == 4 || *max_chunk_bitsize == 8);<br>
+         assert(*max_chunk_bitsize == 4 ||<br>
+                *max_chunk_bitsize == 8 ||<br>
+                *max_chunk_bitsize == 2);<br>
          *max_chunk_bitsize = 0;<br>
          *chunk_start = -1;<br>
          return;<br>
@@ -1987,8 +1988,9 @@ fs_visitor::assign_constant_<wbr>locations()<br>
          int constant_nr = inst->src[i].nr + inst->src[i].offset / 4;<br>
<br>
          if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) {<br>
-            assert(inst->src[2].ud % 4 == 0);<br>
-            unsigned last = constant_nr + (inst->src[2].ud / 4) - 1;<br>
+            assert(type_sz(inst->src[i].<wbr>type) == 2 ?<br>
+                   (inst->src[2].ud % 2 == 0) : (inst->src[2].ud % 4 == 0));<br>
+            unsigned last = constant_nr + DIV_ROUND_UP(inst->src[2].ud, 4) - 1;<br>
             assert(last < uniforms);<br>
<br>
             for (unsigned j = constant_nr; j < last; j++) {<br>
@@ -2000,8 +2002,8 @@ fs_visitor::assign_constant_<wbr>locations()<br>
             bitsize_access[last] = MAX2(bitsize_access[last], type_sz(inst->src[i].type));<br>
          } else {<br>
             if (constant_nr >= 0 && constant_nr < (int) uniforms) {<br>
-               int regs_read = inst->components_read(i) *<br>
-                  type_sz(inst->src[i].type) / 4;<br>
+               int regs_read = DIV_ROUND_UP(inst->components_<wbr>read(i) *<br>
+                                            type_sz(inst->src[i].type), 4);<br>
                for (int j = 0; j < regs_read; j++) {<br>
                   is_live[constant_nr + j] = true;<br>
                   bitsize_access[constant_nr + j] =<br>
@@ -2062,7 +2064,7 @@ fs_visitor::assign_constant_<wbr>locations()<br>
<br>
    }<br>
<br>
-   /* Then push the rest of uniforms */<br>
+   /* Then push the 32-bit uniforms */<br>
    const unsigned uniform_32_bit_size = type_sz(BRW_REGISTER_TYPE_F);<br>
    for (unsigned u = 0; u < uniforms; u++) {<br>
       if (!is_live[u])<br>
@@ -2081,6 +2083,20 @@ fs_visitor::assign_constant_<wbr>locations()<br>
                                  stage_prog_data);<br>
    }<br>
<br>
+   const unsigned uniform_16_bit_size = type_sz(BRW_REGISTER_TYPE_HF);<br>
+   for (unsigned u = 0; u < uniforms; u++) {<br>
+      if (!is_live[u])<br>
+         continue;<br>
+<br>
+      set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize,<br>
+                                 contiguous[u], bitsize_access[u],<br>
+                                 uniform_16_bit_size,<br>
+                                 push_constant_loc, pull_constant_loc,<br>
+                                 &num_push_constants, &num_pull_constants,<br>
+                                 max_push_components, max_chunk_size,<br>
+                                 stage_prog_data);<br>
+   }<br>
+<br>
    /* Add the CS local thread ID uniform at the end of the push constants */<br>
    if (thread_local_id_index >= 0)<br>
       push_constant_loc[thread_<wbr>local_id_index] = num_push_constants++;<br>
<span class="gmail-HOEnZb"><font color="#888888">--<br>
2.13.6<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>