<div dir="ltr">Hi Dave,<div><br></div><div>This commit is causing some GPU hangs for us, on Polaris cards at least (470 and 480).</div><div><br></div><div>It also causes hangs in Dota 2 (on the initial logo screen at startup).</div><div><br></div><div>Seems to be caused by the change to set the <span style="font-size:12.800000190734863px">SPI_PS_INPUT_CNTL registers with </span><span style="font-size:12.800000190734863px">radeon_set_context_reg_seq - changing that to use a separate </span><span style="font-size:12.800000190734863px">radeon_set_context_reg call for each register avoids the hangs here, which seems... odd.</span></div><div><span style="font-size:12.800000190734863px"><br></span></div><div><span style="font-size:12.800000190734863px">Any idea?</span></div><div><span style="font-size:12.800000190734863px"><br></span></div><div><span style="font-size:12.800000190734863px">Thanks,</span></div><div><span style="font-size:12.800000190734863px">Alex<br></span><div class="gmail_extra"><br><div class="gmail_quote">On 28 March 2017 at 02:52, Dave Airlie <span dir="ltr"><<a href="mailto:airlied@gmail.com" target="_blank">airlied@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">From: Dave Airlie <<a href="mailto:airlied@redhat.com" target="_blank">airlied@redhat.com</a>><br>
<br>
There is no need to calculate this on each command submit.<br>
<br>
Signed-off-by: Dave Airlie <<a href="mailto:airlied@redhat.com" target="_blank">airlied@redhat.com</a>><br>
---<br>
 src/amd/vulkan/radv_cmd_buffe<wbr>r.c | 67 +++---------------------------<wbr>-------<br>
 src/amd/vulkan/radv_pipeline.<wbr>c   | 71 ++++++++++++++++++++++++++++++<wbr>++++++++++<br>
 src/amd/vulkan/radv_private.<wbr>h    |  2 ++<br>
 3 files changed, 77 insertions(+), 63 deletions(-)<br>
<br>
diff --git a/src/amd/vulkan/radv_cmd_buff<wbr>er.c b/src/amd/vulkan/radv_cmd_buff<wbr>er.c<br>
index bdb6e2a..7eb5f80 100644<br>
--- a/src/amd/vulkan/radv_cmd_buff<wbr>er.c<br>
+++ b/src/amd/vulkan/radv_cmd_buff<wbr>er.c<br>
@@ -681,18 +681,13 @@ radv_emit_fragment_shader(stru<wbr>ct radv_cmd_buffer *cmd_buffer,<br>
                          struct radv_pipeline *pipeline)<br>
 {<br>
        struct radeon_winsys *ws = cmd_buffer->device->ws;<br>
-       struct radv_shader_variant *ps, *vs;<br>
+       struct radv_shader_variant *ps;<br>
        uint64_t va;<br>
        unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1<wbr>);<br>
        struct radv_blend_state *blend = &pipeline->graphics.blend;<br>
-       unsigned ps_offset = 0;<br>
-       struct ac_vs_output_info *outinfo;<br>
        assert (pipeline->shaders[MESA_SHADER<wbr>_FRAGMENT]);<br>
<br>
        ps = pipeline->shaders[MESA_SHADER_<wbr>FRAGMENT];<br>
-       vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_<wbr>VERTEX];<br>
-<br>
-       outinfo = &vs->info.vs.outinfo;<br>
<br>
        va = ws->buffer_get_va(ps->bo);<br>
        ws->cs_add_buffer(cmd_buffer-><wbr>cs, ps->bo, 8);<br>
@@ -728,63 +723,9 @@ radv_emit_fragment_shader(stru<wbr>ct radv_cmd_buffer *cmd_buffer,<br>
        radeon_set_context_reg(cmd_buf<wbr>fer->cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);<br>
        radeon_set_context_reg(cmd_buf<wbr>fer->cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);<br>
<br>
-       if (ps->info.fs.has_pcoord) {<br>
-               unsigned val;<br>
-               val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);<br>
-               radeon_set_context_reg(cmd_bu<wbr>ffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);<br>
-               ps_offset++;<br>
-       }<br>
-<br>
-       if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) {<br>
-               unsigned vs_offset, flat_shade;<br>
-               unsigned val;<br>
-               vs_offset = outinfo->prim_id_output;<br>
-               flat_shade = true;<br>
-               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade<wbr>);<br>
-               radeon_set_context_reg(cmd_bu<wbr>ffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);<br>
-               ++ps_offset;<br>
-       }<br>
-<br>
-       if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) {<br>
-               unsigned vs_offset, flat_shade;<br>
-               unsigned val;<br>
-               vs_offset = outinfo->layer_output;<br>
-               flat_shade = true;<br>
-               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade<wbr>);<br>
-               radeon_set_context_reg(cmd_bu<wbr>ffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);<br>
-               ++ps_offset;<br>
-       }<br>
-<br>
-       for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {<br>
-               unsigned vs_offset, flat_shade;<br>
-               unsigned val;<br>
-<br>
-               if (!(ps->info.fs.input_mask & (1u << i)))<br>
-                       continue;<br>
-<br>
-<br>
-               if (!(outinfo->export_mask & (1u << i))) {<br>
-                       radeon_set_context_reg(cmd_bu<wbr>ffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset,<br>
-                                              S_028644_OFFSET(0x20));<br>
-                       ++ps_offset;<br>
-                       continue;<br>
-               }<br>
-<br>
-               vs_offset = util_bitcount(outinfo->export_<wbr>mask & ((1u << i) - 1));<br>
-               if (outinfo->prim_id_output != 0xffffffff) {<br>
-                       if (vs_offset >= outinfo->prim_id_output)<br>
-                               vs_offset++;<br>
-               }<br>
-               if (outinfo->layer_output != 0xffffffff) {<br>
-                       if (vs_offset >= outinfo->layer_output)<br>
-                         vs_offset++;<br>
-               }<br>
-               flat_shade = !!(ps->info.fs.flat_shaded_mas<wbr>k & (1u << ps_offset));<br>
-<br>
-               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade<wbr>);<br>
-               radeon_set_context_reg(cmd_bu<wbr>ffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);<br>
-               ++ps_offset;<br>
-       }<br>
+       radeon_set_context_reg_seq(cm<wbr>d_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cn<wbr>tl_num);<br>
+       for (unsigned i = 0; i < pipeline->graphics.ps_input_cn<wbr>tl_num; i++)<br>
+               radeon_emit(cmd_buffer->cs, pipeline->graphics.ps_input_cn<wbr>tl[i]);<br>
 }<br>
<br>
 static void<br>
diff --git a/src/amd/vulkan/radv_pipeline<wbr>.c b/src/amd/vulkan/radv_pipeline<wbr>.c<br>
index 550b773..c7d7480 100644<br>
--- a/src/amd/vulkan/radv_pipeline<wbr>.c<br>
+++ b/src/amd/vulkan/radv_pipeline<wbr>.c<br>
@@ -1527,6 +1527,76 @@ static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs)<br>
               S_028A40_GS_WRITE_OPTIMIZE(1)<wbr>;<br>
 }<br>
<br>
+static void calculate_ps_inputs(struct radv_pipeline *pipeline)<br>
+{<br>
+       struct radv_shader_variant *ps, *vs;<br>
+       struct ac_vs_output_info *outinfo;<br>
+<br>
+       ps = pipeline->shaders[MESA_SHADER_<wbr>FRAGMENT];<br>
+       vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_<wbr>VERTEX];<br>
+<br>
+       outinfo = &vs->info.vs.outinfo;<br>
+<br>
+       unsigned ps_offset = 0;<br>
+       if (ps->info.fs.has_pcoord) {<br>
+               unsigned val;<br>
+               val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);<br>
+               pipeline->graphics.ps_input_c<wbr>ntl[ps_offset] = val;<br>
+               ps_offset++;<br>
+       }<br>
+<br>
+       if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) {<br>
+               unsigned vs_offset, flat_shade;<br>
+               unsigned val;<br>
+               vs_offset = outinfo->prim_id_output;<br>
+               flat_shade = true;<br>
+               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade<wbr>);<br>
+               pipeline->graphics.ps_input_c<wbr>ntl[ps_offset] = val;<br>
+               ++ps_offset;<br>
+       }<br>
+<br>
+       if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) {<br>
+               unsigned vs_offset, flat_shade;<br>
+               unsigned val;<br>
+               vs_offset = outinfo->layer_output;<br>
+               flat_shade = true;<br>
+               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade<wbr>);<br>
+               pipeline->graphics.ps_input_c<wbr>ntl[ps_offset] = val;<br>
+               ++ps_offset;<br>
+       }<br>
+<br>
+       for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {<br>
+               unsigned vs_offset, flat_shade;<br>
+               unsigned val;<br>
+<br>
+               if (!(ps->info.fs.input_mask & (1u << i)))<br>
+                       continue;<br>
+<br>
+               if (!(outinfo->export_mask & (1u << i))) {<br>
+                       pipeline->graphics.ps_input_c<wbr>ntl[ps_offset] = S_028644_OFFSET(0x20);<br>
+                       ++ps_offset;<br>
+                       continue;<br>
+               }<br>
+<br>
+               vs_offset = util_bitcount(outinfo->export_<wbr>mask & ((1u << i) - 1));<br>
+               if (outinfo->prim_id_output != 0xffffffff) {<br>
+                       if (vs_offset >= outinfo->prim_id_output)<br>
+                               vs_offset++;<br>
+               }<br>
+               if (outinfo->layer_output != 0xffffffff) {<br>
+                       if (vs_offset >= outinfo->layer_output)<br>
+                         vs_offset++;<br>
+               }<br>
+               flat_shade = !!(ps->info.fs.flat_shaded_mas<wbr>k & (1u << ps_offset));<br>
+<br>
+               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade<wbr>);<br>
+               pipeline->graphics.ps_input_c<wbr>ntl[ps_offset] = val;<br>
+               ++ps_offset;<br>
+       }<br>
+<br>
+       pipeline->graphics.ps_input_c<wbr>ntl_num = ps_offset;<br>
+}<br>
+<br>
 VkResult<br>
 radv_pipeline_init(struct radv_pipeline *pipeline,<br>
                   struct radv_device *device,<br>
@@ -1672,6 +1742,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,<br>
                ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :<br>
                V_028710_SPI_SHADER_ZERO;<br>
<br>
+       calculate_ps_inputs(pipeline)<wbr>;<br>
        const VkPipelineVertexInputStateCrea<wbr>teInfo *vi_info =<br>
                pCreateInfo->pVertexInputState<wbr>;<br>
        for (uint32_t i = 0; i < vi_info->vertexAttributeDescri<wbr>ptionCount; i++) {<br>
diff --git a/src/amd/vulkan/radv_private.<wbr>h b/src/amd/vulkan/radv_private.<wbr>h<br>
index dff0aef..bf3d19c 100644<br>
--- a/src/amd/vulkan/radv_private.<wbr>h<br>
+++ b/src/amd/vulkan/radv_private.<wbr>h<br>
@@ -963,6 +963,8 @@ struct radv_pipeline {<br>
                        bool prim_restart_enable;<br>
                        unsigned esgs_ring_size;<br>
                        unsigned gsvs_ring_size;<br>
+                       uint32_t ps_input_cntl[32];<br>
+                       uint32_t ps_input_cntl_num;<br>
                        struct radv_prim_vertex_count prim_vertex_count;<br>
                } graphics;<br>
        };<br>
<span class="gmail-m_5504090767150618807HOEnZb"><font color="#888888">--<br>
2.9.3<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div></div>