Mesa (main): dzn: Fix triangle-fan emulation

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jun 15 00:33:47 UTC 2022


Module: Mesa
Branch: main
Commit: 91f3c7a9fbf4d5324210f245010ecc84205a5563
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=91f3c7a9fbf4d5324210f245010ecc84205a5563

Author: Boris Brezillon <boris.brezillon at collabora.com>
Date:   Mon Jun 13 16:22:38 2022 +0200

dzn: Fix triangle-fan emulation

We were completely ignoring the primitive-restart case in the
index-rewrite logic used to emulate triangle fans. Unfortunately, this
case is way more complicated than a regular index rewrite:

- we need to skip all primitive-restart entries when turning the triangle
  fan into a triangle list, which implies serializing the index buffer
  rewrite procedure (at least I didn't find any clever way to parallelize
  things)
- the number of triangles can no longer be extrapolated from the number
  of indices in the original index buffer, thus forcing us to lower
  direct indexed draws into indirect draws and patching the indexCount
  value when the new index buffer is forged

Reviewed-by: Jesse Natalie <jenatali at microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16971>

---

 src/microsoft/vulkan/dzn_cmd_buffer.c | 132 ++++++++++++++----
 src/microsoft/vulkan/dzn_meta.c       |  93 ++++++++++---
 src/microsoft/vulkan/dzn_nir.c        | 255 ++++++++++++++++++++++++++++++----
 src/microsoft/vulkan/dzn_nir.h        |  26 ++++
 src/microsoft/vulkan/dzn_private.h    |  21 ++-
 5 files changed, 450 insertions(+), 77 deletions(-)

diff --git a/src/microsoft/vulkan/dzn_cmd_buffer.c b/src/microsoft/vulkan/dzn_cmd_buffer.c
index de84cc53960..037bdcd5bd2 100644
--- a/src/microsoft/vulkan/dzn_cmd_buffer.c
+++ b/src/microsoft/vulkan/dzn_cmd_buffer.c
@@ -2479,8 +2479,15 @@ dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf,
    D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu =
       cmdbuf->state.ib.view.BufferLocation;
 
+   ASSERTED const struct dzn_graphics_pipeline *gfx_pipeline = (const struct dzn_graphics_pipeline *)
+      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
+   ASSERTED bool prim_restart =
+      dzn_graphics_pipeline_get_desc_template(gfx_pipeline, ib_strip_cut) != NULL;
+
+   assert(!prim_restart);
+
    enum dzn_index_type index_type =
-      dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format);
+      dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, false);
    const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
       &device->triangle_fan[index_type];
 
@@ -2589,9 +2596,9 @@ dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf
 
 static void
 dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
-                             struct dzn_buffer *draw_buf,
+                             ID3D12Resource *draw_buf,
                              size_t draw_buf_offset,
-                             struct dzn_buffer *count_buf,
+                             ID3D12Resource *count_buf,
                              size_t count_buf_offset,
                              uint32_t max_draw_count,
                              uint32_t draw_buf_stride,
@@ -2605,6 +2612,8 @@ dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
       indexed ?
       sizeof(struct dzn_indirect_indexed_draw_params) :
       sizeof(struct dzn_indirect_draw_params);
+   bool prim_restart =
+      dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut) != NULL;
 
    draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride;
    assert(draw_buf_stride >= min_draw_buf_stride);
@@ -2640,7 +2649,7 @@ dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
       return;
 
    D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu =
-      ID3D12Resource_GetGPUVirtualAddress(draw_buf->res) + draw_buf_offset;
+      ID3D12Resource_GetGPUVirtualAddress(draw_buf) + draw_buf_offset;
    ID3D12Resource *triangle_fan_index_buf = NULL;
    ID3D12Resource *triangle_fan_exec_buf = NULL;
 
@@ -2664,24 +2673,35 @@ dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
          return;
    }
 
-   struct dzn_indirect_draw_triangle_fan_rewrite_params params = {
+   struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params params = {
       .draw_buf_stride = draw_buf_stride,
       .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride,
       .triangle_fan_index_buf_start =
          triangle_fan_index_buf ?
          ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0,
+      .exec_buf_start =
+         prim_restart ?
+         ID3D12Resource_GetGPUVirtualAddress(exec_buf) + exec_buf_draw_offset : 0,
    };
-   uint32_t params_size =
-      triangle_fan_index_buf_stride > 0 ?
-      sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) :
-      sizeof(struct dzn_indirect_draw_rewrite_params);
+   uint32_t params_size;
+   if (triangle_fan_index_buf_stride > 0 && prim_restart)
+      params_size = sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params);
+   else if (triangle_fan_index_buf_stride > 0)
+      params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
+   else
+      params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
 
    enum dzn_indirect_draw_type draw_type;
 
    if (indexed && triangle_fan_index_buf_stride > 0) {
-      draw_type = count_buf ?
-                  DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN :
-                  DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN;
+      if (prim_restart && count_buf)
+         draw_type =  DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
+      else if (prim_restart && !count_buf)
+         draw_type =  DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART;
+      else if (!prim_restart && count_buf)
+         draw_type = DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
+      else
+         draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN;
    } else if (!indexed && triangle_fan_index_buf_stride > 0) {
       draw_type = count_buf ?
                   DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN :
@@ -2711,7 +2731,7 @@ dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
    if (count_buf) {
       ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
                                                                   root_param_idx++,
-                                                                  ID3D12Resource_GetGPUVirtualAddress(count_buf->res) +
+                                                                  ID3D12Resource_GetGPUVirtualAddress(count_buf) +
                                                                   count_buf_offset);
    }
 
@@ -2752,16 +2772,12 @@ dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
       },
    };
 
-   uint32_t post_barrier_count = triangle_fan_exec_buf ? 2 : 1;
-
-   ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, post_barrier_count, post_barriers);
-
    D3D12_INDEX_BUFFER_VIEW ib_view = { 0 };
 
    if (triangle_fan_exec_buf) {
       enum dzn_index_type index_type =
          indexed ?
-         dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format) :
+         dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, prim_restart) :
          DZN_NO_INDEX;
       struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
          &device->triangle_fan[index_type];
@@ -2772,6 +2788,22 @@ dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
       assert(rewrite_index->pipeline_state);
       assert(rewrite_index->cmd_sig);
 
+      D3D12_RESOURCE_BARRIER triangle_fan_exec_buf_barrier = {
+         .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+         .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+          /* Transition the exec buffer to indirect arg so it can be
+           * pass to ExecuteIndirect() as an argument buffer.
+           */
+         .Transition = {
+            .pResource = triangle_fan_exec_buf,
+            .Subresource = 0,
+            .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+            .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
+         },
+      };
+
+      ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &triangle_fan_exec_buf_barrier);
+
       ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
       ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
       root_param_idx = 0;
@@ -2815,6 +2847,22 @@ dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
       cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
    }
 
+   D3D12_RESOURCE_BARRIER exec_buf_barrier = {
+      .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+      .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+       /* Transition the exec buffer to indirect arg so it can be
+        * pass to ExecuteIndirect() as an argument buffer.
+        */
+      .Transition = {
+         .pResource = exec_buf,
+         .Subresource = 0,
+         .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+         .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
+      },
+   };
+
+   ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &exec_buf_barrier);
+
    /* We don't mess up with the driver state when executing our internal
     * compute shader, but we still change the D3D12 state, so let's mark
     * things dirty if needed.
@@ -3836,6 +3884,42 @@ dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,
    const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
 
+   if (pipeline->ia.triangle_fan &&
+       dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) {
+      /* The indexed+primitive-restart+triangle-fan combination is a mess,
+       * since we have to walk the index buffer, skip entries with the
+       * special 0xffff/0xffffffff values, and push triangle list indices
+       * for the remaining values. All of this has an impact on the index
+       * count passed to the draw call, which forces us to use the indirect
+       * path.
+       */
+      struct dzn_indirect_indexed_draw_params params = {
+         .index_count = indexCount,
+         .instance_count = instanceCount,
+         .first_index = firstIndex,
+         .vertex_offset = vertexOffset,
+         .first_instance = firstInstance,
+      };
+
+      ID3D12Resource *draw_buf;
+      VkResult result =
+         dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(params),
+                                           D3D12_HEAP_TYPE_UPLOAD,
+                                           D3D12_RESOURCE_STATE_GENERIC_READ,
+                                           &draw_buf);
+      if (result != VK_SUCCESS)
+         return;
+
+      void *cpu_ptr;
+      ID3D12Resource_Map(draw_buf, 0, NULL, &cpu_ptr);
+      memcpy(cpu_ptr, &params, sizeof(params));
+
+      ID3D12Resource_Unmap(draw_buf, 0, NULL);
+
+      dzn_cmd_buffer_indirect_draw(cmdbuf, draw_buf, 0, NULL, 0, 1, sizeof(params), true);
+      return;
+   }
+
    cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset;
    cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
    cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
@@ -3872,7 +3956,7 @@ dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,
    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
 
-   dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset, NULL, 0, drawCount, stride, false);
+   dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, false);
 }
 
 VKAPI_ATTR void VKAPI_CALL
@@ -3885,7 +3969,7 @@ dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
 
-   dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset, NULL, 0, drawCount, stride, true);
+   dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, true);
 }
 
 VKAPI_ATTR void VKAPI_CALL
@@ -3901,8 +3985,8 @@ dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
    VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
 
-   dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset,
-                                count_buf, countBufferOffset,
+   dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
+                                count_buf->res, countBufferOffset,
                                 maxDrawCount, stride, false);
 }
 
@@ -3919,8 +4003,8 @@ dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
    VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
 
-   dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset,
-                                count_buf, countBufferOffset,
+   dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
+                                count_buf->res, countBufferOffset,
                                 maxDrawCount, stride, true);
 }
 
diff --git a/src/microsoft/vulkan/dzn_meta.c b/src/microsoft/vulkan/dzn_meta.c
index f75fcaf91d7..db5549f3eb6 100644
--- a/src/microsoft/vulkan/dzn_meta.c
+++ b/src/microsoft/vulkan/dzn_meta.c
@@ -116,12 +116,19 @@ dzn_meta_indirect_draw_init(struct dzn_device *device,
    bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN ||
                        type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
                        type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
-                       type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
+                       type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
+                       type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
+                       type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
    bool indirect_count = type == DZN_INDIRECT_DRAW_COUNT ||
                          type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
                          type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
-                         type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
+                         type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
+                         type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
+   bool prim_restart = type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
+                       type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
    uint32_t shader_params_size =
+      triangle_fan && prim_restart ?
+      sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params) :
       triangle_fan ?
       sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) :
       sizeof(struct dzn_indirect_draw_rewrite_params);
@@ -226,7 +233,7 @@ out:
    return ret;
 }
 
-#define DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT 3
+#define DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT 4
 
 static void
 dzn_meta_triangle_fan_rewrite_index_finish(struct dzn_device *device,
@@ -256,8 +263,14 @@ dzn_meta_triangle_fan_rewrite_index_init(struct dzn_device *device,
    glsl_type_singleton_init_or_ref();
 
    uint8_t old_index_size = dzn_index_size(old_index_type);
+   bool prim_restart =
+      old_index_type == DZN_INDEX_2B_WITH_PRIM_RESTART ||
+      old_index_type == DZN_INDEX_4B_WITH_PRIM_RESTART;
 
-   nir_shader *nir = dzn_nir_triangle_fan_rewrite_index_shader(old_index_size);
+   nir_shader *nir =
+      prim_restart ?
+      dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(old_index_size) :
+      dzn_nir_triangle_fan_rewrite_index_shader(old_index_size);
 
    uint32_t root_param_count = 0;
    D3D12_ROOT_PARAMETER1 root_params[DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT];
@@ -272,12 +285,17 @@ dzn_meta_triangle_fan_rewrite_index_init(struct dzn_device *device,
       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
    };
 
+   uint32_t params_size =
+      prim_restart ?
+      sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) :
+      sizeof(struct dzn_triangle_fan_rewrite_index_params);
+
    root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
       .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
       .Constants = {
          .ShaderRegister = 0,
          .RegisterSpace = 0,
-         .Num32BitValues = sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4,
+         .Num32BitValues = params_size / 4,
       },
       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
    };
@@ -294,6 +312,18 @@ dzn_meta_triangle_fan_rewrite_index_init(struct dzn_device *device,
       };
    }
 
+   if (prim_restart) {
+      root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
+         .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
+         .Descriptor = {
+            .ShaderRegister = 3,
+            .RegisterSpace = 0,
+            .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
+         },
+         .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
+      };
+   }
+
    assert(root_param_count <= ARRAY_SIZE(root_params));
 
    D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
@@ -309,29 +339,48 @@ dzn_meta_triangle_fan_rewrite_index_init(struct dzn_device *device,
       .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
    };
 
-   D3D12_INDIRECT_ARGUMENT_DESC cmd_args[] = {
-      {
+   uint32_t cmd_arg_count = 0;
+   D3D12_INDIRECT_ARGUMENT_DESC cmd_args[4];
+
+   cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
+      .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW,
+      .UnorderedAccessView = {
+         .RootParameterIndex = 0,
+      },
+   };
+
+   cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
+      .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
+      .Constant = {
+         .RootParameterIndex = 1,
+         .DestOffsetIn32BitValues = 0,
+         .Num32BitValuesToSet = params_size / 4,
+      },
+   };
+
+   if (prim_restart) {
+      cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW,
          .UnorderedAccessView = {
-            .RootParameterIndex = 0,
+            .RootParameterIndex = 3,
          },
-      },
-      {
-         .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
-         .Constant = {
-            .RootParameterIndex = 1,
-            .DestOffsetIn32BitValues = 0,
-            .Num32BitValuesToSet = sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4,
-         },
-      },
-      {
-         .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
-      },
+      };
+   }
+
+   cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
+      .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
    };
 
+   assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
+
+   uint32_t exec_params_size =
+      prim_restart ?
+      sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) :
+      sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
+
    D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
-      .ByteStride = sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params),
-      .NumArgumentDescs = ARRAY_SIZE(cmd_args),
+      .ByteStride = exec_params_size,
+      .NumArgumentDescs = cmd_arg_count,
       .pArgumentDescs = cmd_args,
    };
 
diff --git a/src/microsoft/vulkan/dzn_nir.c b/src/microsoft/vulkan/dzn_nir.c
index fcae26f45b7..518c37c5c96 100644
--- a/src/microsoft/vulkan/dzn_nir.c
+++ b/src/microsoft/vulkan/dzn_nir.c
@@ -93,6 +93,8 @@ dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
       "draw_count_triangle_fan",
       "indexed_draw_triangle_fan",
       "indexed_draw_count_triangle_fan",
+      "indexed_draw_triangle_fan_prim_restart",
+      "indexed_draw_count_triangle_fan_prim_restart",
    };
 
    assert(type < ARRAY_SIZE(type_str));
@@ -100,15 +102,22 @@ dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
    bool indexed = type == DZN_INDIRECT_INDEXED_DRAW ||
                   type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
                   type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
-                  type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
+                  type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
+                  type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
+                  type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
    bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN ||
                        type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
                        type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
-                       type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
+                       type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
+                       type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
+                       type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
    bool indirect_count = type == DZN_INDIRECT_DRAW_COUNT ||
                          type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
                          type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
-                         type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
+                         type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
+                         type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
+   bool prim_restart = type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
+                       type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
    nir_builder b =
       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
                                      dxil_get_nir_compiler_options(),
@@ -130,10 +139,11 @@ dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
    nir_ssa_def *exec_buf_desc =
       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE);
 
-   unsigned params_size =
-      triangle_fan ?
-      sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) :
-      sizeof(struct dzn_indirect_draw_rewrite_params);
+   unsigned params_size;
+   if (triangle_fan)
+      params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
+   else
+      params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
 
    nir_ssa_def *params =
       nir_load_ubo(&b, params_size / 4, 32,
@@ -209,35 +219,64 @@ dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
          nir_iadd(&b, nir_channel(&b, params, 2),
                   nir_imul(&b, triangle_fan_index_buf_stride, index));
 
+      nir_ssa_def *triangle_fan_exec_vals[9] = { 0 };
+      uint32_t triangle_fan_exec_param_count = 0;
       nir_ssa_def *addr_lo_overflow =
          nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2));
       nir_ssa_def *triangle_fan_index_buf_addr_hi =
          nir_iadd(&b, nir_channel(&b, params, 3),
                   nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
 
-      nir_ssa_def *triangle_fan_exec_vals[] = {
-         triangle_fan_index_buf_addr_lo,
-         triangle_fan_index_buf_addr_hi,
-         indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0),
-         triangle_count,
-         nir_imm_int(&b, 1),
-         nir_imm_int(&b, 1),
-      };
-
-      assert(sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params) == (ARRAY_SIZE(triangle_fan_exec_vals) * 4));
+      triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_lo;
+      triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_hi;
+
+      if (prim_restart) {
+         triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 2);
+         triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 0);
+	 uint32_t index_count_offset =
+            offsetof(struct dzn_indirect_triangle_fan_draw_exec_params, indexed_draw.index_count);
+	 nir_ssa_def *exec_buf_start =
+            nir_load_ubo(&b, 2, 32,
+                         params_desc, nir_imm_int(&b, 16),
+                         .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
+         nir_ssa_def *exec_buf_start_lo =
+            nir_iadd(&b, nir_imm_int(&b, index_count_offset),
+                     nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
+                              nir_imul(&b, exec_stride, index)));
+	 addr_lo_overflow = nir_ult(&b, exec_buf_start_lo, nir_channel(&b, exec_buf_start, 0));
+         nir_ssa_def *exec_buf_start_hi =
+            nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
+                     nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
+         triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_lo;
+         triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_hi;
+         triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
+      } else {
+         triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
+            indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0);
+         triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
+            triangle_count;
+      }
+      triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
+      triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
 
+      unsigned rewrite_index_exec_params =
+         prim_restart ?
+         sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) :
+         sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
       nir_ssa_def *triangle_fan_exec_stride =
-         nir_imm_int(&b, sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params));
+         nir_imm_int(&b, rewrite_index_exec_params);
       nir_ssa_def *triangle_fan_exec_offset =
          nir_imul(&b, triangle_fan_exec_stride, index);
 
-      nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[0], 4),
-                     triangle_fan_exec_buf_desc, triangle_fan_exec_offset,
-                     .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 4);
-      nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[4], 2),
-                     triangle_fan_exec_buf_desc,
-                     nir_iadd_imm(&b, triangle_fan_exec_offset, 16),
-                     .write_mask = 0x3, .access = ACCESS_NON_READABLE, .align_mul = 4);
+      for (uint32_t i = 0; i < triangle_fan_exec_param_count; i += 4) {
+         unsigned comps = MIN2(triangle_fan_exec_param_count - i, 4);
+         uint32_t mask = (1 << comps) - 1;
+
+         nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[i], comps),
+                        triangle_fan_exec_buf_desc,
+                        nir_iadd_imm(&b, triangle_fan_exec_offset, i * 4),
+                        .write_mask = mask, .access = ACCESS_NON_READABLE, .align_mul = 4);
+      }
 
       nir_ssa_def *ibview_vals[] = {
          triangle_fan_index_buf_addr_lo,
@@ -271,6 +310,172 @@ dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
    return b.shader;
 }
 
+nir_shader *
+dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)
+{
+   assert(old_index_size == 2 || old_index_size == 4);
+
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
+                                     dxil_get_nir_compiler_options(),
+                                     "dzn_meta_triangle_prim_rewrite_index(old_index_size=%d)",
+                                     old_index_size);
+   b.shader->info.internal = true;
+
+   nir_ssa_def *params_desc =
+      dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
+   nir_ssa_def *new_index_buf_desc =
+      dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
+                             "new_index_buf", ACCESS_NON_READABLE);
+   nir_ssa_def *old_index_buf_desc =
+      dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
+                             "old_index_buf", ACCESS_NON_WRITEABLE);
+   nir_ssa_def *new_index_count_ptr_desc =
+      dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3,
+                             "new_index_count_ptr", ACCESS_NON_READABLE);
+
+   nir_ssa_def *params =
+      nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) / 4, 32,
+                   params_desc, nir_imm_int(&b, 0),
+                   .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
+
+   nir_ssa_def *prim_restart_val =
+      nir_imm_int(&b, old_index_size == 2 ? 0xffff : 0xffffffff);
+   nir_variable *old_index_ptr_var =
+      nir_local_variable_create(b.impl, glsl_uint_type(), "old_index_ptr_var");
+   nir_ssa_def *old_index_ptr = nir_channel(&b, params, 0);
+   nir_store_var(&b, old_index_ptr_var, old_index_ptr, 1);
+   nir_variable *new_index_ptr_var =
+      nir_local_variable_create(b.impl, glsl_uint_type(), "new_index_ptr_var");
+   nir_store_var(&b, new_index_ptr_var, nir_imm_int(&b, 0), 1);
+   nir_ssa_def *old_index_count = nir_channel(&b, params, 1);
+   nir_variable *index0_var =
+      nir_local_variable_create(b.impl, glsl_uint_type(), "index0_var");
+   nir_store_var(&b, index0_var, prim_restart_val, 1);
+
+   /*
+    * Filter out all primitive-restart magic values, and generate a triangle list
+    * from the triangle fan definition.
+    *
+    * Basically:
+    *
+    * new_index_ptr = 0;
+    * index0 = restart_prim_value; // 0xffff or 0xffffffff
+    * for (old_index_ptr = firstIndex; old_index_ptr < indexCount;) {
+    *    // If we have no starting-point we need at least 3 vertices,
+    *    // otherwise we can do with two. If there's not enough vertices
+    *    // to form a primitive, we just bail out.
+    *    min_indices = index0 == restart_prim_value ? 3 : 2;
+    *    if (old_index_ptr + min_indices > firstIndex + indexCount)
+    *       break;
+    *
+    *    if (index0 == restart_prim_value) {
+    *       // No starting point, skip all entries until we have a
+    *       // non-primitive-restart value
+    *       index0 = old_index_buf[old_index_ptr++];
+    *       continue;
+    *    }
+    *
+    *    // If at least one index contains the primitive-restart pattern,
+         // ignore this triangle, and skip the unused entries
+    *    if (old_index_buf[old_index_ptr + 1] == restart_prim_value) {
+    *       old_index_ptr += 2;
+    *       continue;
+    *    }
+    *    if (old_index_buf[old_index_ptr] == restart_prim_value) {
+    *       old_index_ptr++;
+    *       continue;
+    *    }
+    *
+    *    // We have a valid primitive, queue it to the new index buffer
+    *    new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr];
+    *    new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr + 1];
+    *    new_index_buf[new_index_ptr++] = index0;
+    * }
+    *
+    * expressed in NIR, which admitedly is not super easy to grasp with.
+    * TODO: Might be a good thing to use use the CL compiler we have and turn
+    * those shaders into CL kernels.
+    */
+   nir_push_loop(&b);
+
+   old_index_ptr = nir_load_var(&b, old_index_ptr_var);
+   nir_ssa_def *index0 = nir_load_var(&b, index0_var);
+
+   nir_ssa_def *read_index_count =
+      nir_bcsel(&b, nir_ieq(&b, index0, prim_restart_val),
+                nir_imm_int(&b, 3), nir_imm_int(&b, 2));
+   nir_push_if(&b, nir_ult(&b, old_index_count, nir_iadd(&b, old_index_ptr, read_index_count)));
+   nir_jump(&b, nir_jump_break);
+   nir_pop_if(&b, NULL);
+
+   nir_ssa_def *old_index_offset =
+      nir_imul_imm(&b, old_index_ptr, old_index_size);
+
+   nir_push_if(&b, nir_ieq(&b, index0, prim_restart_val));
+   nir_ssa_def *index_val =
+      nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
+                    old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
+                    .align_mul = 4);
+   if (old_index_size == 2) {
+      index_val = nir_bcsel(&b,
+                            nir_ieq_imm(&b, nir_iand_imm(&b, old_index_offset, 0x2), 0),
+                            nir_iand_imm(&b, index_val, 0xffff),
+                            nir_ushr_imm(&b, index_val, 16));
+   }
+
+   nir_store_var(&b, index0_var, index_val, 1);
+   nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
+   nir_jump(&b, nir_jump_continue);
+   nir_pop_if(&b, NULL);
+
+   nir_ssa_def *index12 =
+      nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
+                    old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
+                    .align_mul = 4);
+   if (old_index_size == 2) {
+      nir_ssa_def *indices[] = {
+         nir_iand_imm(&b, nir_channel(&b, index12, 0), 0xffff),
+         nir_ushr_imm(&b, nir_channel(&b, index12, 0), 16),
+         nir_iand_imm(&b, nir_channel(&b, index12, 1), 0xffff),
+      };
+
+      index12 =
+         nir_bcsel(&b,
+                   nir_ieq_imm(&b, nir_iand_imm(&b, old_index_offset, 0x2), 0),
+                   nir_vec2(&b, indices[0], indices[1]),
+                   nir_vec2(&b, indices[1], indices[2]));
+   }
+
+   nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 1), prim_restart_val));
+   nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 2), 1);
+   nir_store_var(&b, index0_var, prim_restart_val, 1);
+   nir_jump(&b, nir_jump_continue);
+   nir_push_else(&b, NULL);
+   nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
+   nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 0), prim_restart_val));
+   nir_store_var(&b, index0_var, prim_restart_val, 1);
+   nir_jump(&b, nir_jump_continue);
+   nir_push_else(&b, NULL);
+   nir_ssa_def *new_indices =
+      nir_vec3(&b, nir_channel(&b, index12, 0), nir_channel(&b, index12, 1), index0);
+   nir_ssa_def *new_index_ptr = nir_load_var(&b, new_index_ptr_var);
+   nir_ssa_def *new_index_offset = nir_imul_imm(&b, new_index_ptr, sizeof(uint32_t));
+   nir_store_ssbo(&b, new_indices, new_index_buf_desc,
+                  new_index_offset,
+                  .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
+   nir_store_var(&b, new_index_ptr_var, nir_iadd_imm(&b, new_index_ptr, 3), 1);
+   nir_pop_if(&b, NULL);
+   nir_pop_if(&b, NULL);
+   nir_pop_loop(&b, NULL);
+
+   nir_store_ssbo(&b, nir_load_var(&b, new_index_ptr_var),
+                  new_index_count_ptr_desc, nir_imm_int(&b, 0),
+                  .write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4);
+
+   return b.shader;
+}
+
 nir_shader *
 dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)
 {
diff --git a/src/microsoft/vulkan/dzn_nir.h b/src/microsoft/vulkan/dzn_nir.h
index 9492764fbb1..a6e4383121b 100644
--- a/src/microsoft/vulkan/dzn_nir.h
+++ b/src/microsoft/vulkan/dzn_nir.h
@@ -56,6 +56,13 @@ struct dzn_indirect_draw_triangle_fan_rewrite_params {
    uint64_t triangle_fan_index_buf_start;
 };
 
+struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params {
+   uint32_t draw_buf_stride;
+   uint32_t triangle_fan_index_buf_stride;
+   uint64_t triangle_fan_index_buf_start;
+   uint64_t exec_buf_start;
+};
+
 struct dzn_indirect_draw_exec_params {
    struct {
       uint32_t first_vertex;
@@ -85,6 +92,11 @@ struct dzn_triangle_fan_rewrite_index_params {
    uint32_t first_index;
 };
 
+struct dzn_triangle_fan_prim_restart_rewrite_index_params {
+   uint32_t first_index;
+   uint32_t index_count;
+};
+
 struct dzn_indirect_triangle_fan_rewrite_index_exec_params {
    uint64_t new_index_buf;
    struct dzn_triangle_fan_rewrite_index_params params;
@@ -93,6 +105,15 @@ struct dzn_indirect_triangle_fan_rewrite_index_exec_params {
    } group_count;
 };
 
+struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params {
+   uint64_t new_index_buf;
+   struct dzn_triangle_fan_prim_restart_rewrite_index_params params;
+   uint64_t index_count_ptr;
+   struct {
+      uint32_t x, y, z;
+   } group_count;
+};
+
 enum dzn_indirect_draw_type {
    DZN_INDIRECT_DRAW,
    DZN_INDIRECT_DRAW_COUNT,
@@ -102,6 +123,8 @@ enum dzn_indirect_draw_type {
    DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN,
    DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN,
    DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN,
+   DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART,
+   DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART,
    DZN_NUM_INDIRECT_DRAW_TYPES,
 };
 
@@ -111,6 +134,9 @@ dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type);
 nir_shader *
 dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size);
 
+nir_shader *
+dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size);
+
 struct dzn_nir_blit_info {
    union {
       struct {
diff --git a/src/microsoft/vulkan/dzn_private.h b/src/microsoft/vulkan/dzn_private.h
index 4a91484169d..1093048028e 100644
--- a/src/microsoft/vulkan/dzn_private.h
+++ b/src/microsoft/vulkan/dzn_private.h
@@ -84,6 +84,8 @@ enum dzn_index_type {
    DZN_NO_INDEX,
    DZN_INDEX_2B,
    DZN_INDEX_4B,
+   DZN_INDEX_2B_WITH_PRIM_RESTART,
+   DZN_INDEX_4B_WITH_PRIM_RESTART,
    DZN_NUM_INDEX_TYPE,
 };
 
@@ -99,12 +101,14 @@ dzn_index_type_from_size(uint8_t index_size)
 }
 
 static inline enum dzn_index_type
-dzn_index_type_from_dxgi_format(DXGI_FORMAT format)
+dzn_index_type_from_dxgi_format(DXGI_FORMAT format, bool prim_restart)
 {
    switch (format) {
    case DXGI_FORMAT_UNKNOWN: return DZN_NO_INDEX;
-   case DXGI_FORMAT_R16_UINT: return DZN_INDEX_2B;
-   case DXGI_FORMAT_R32_UINT: return DZN_INDEX_4B;
+   case DXGI_FORMAT_R16_UINT:
+      return prim_restart ? DZN_INDEX_2B_WITH_PRIM_RESTART : DZN_INDEX_2B;
+   case DXGI_FORMAT_R32_UINT:
+      return prim_restart ? DZN_INDEX_4B_WITH_PRIM_RESTART : DZN_INDEX_4B;
    default: unreachable("Invalid index format");
    }
 }
@@ -113,9 +117,14 @@ static inline uint8_t
 dzn_index_size(enum dzn_index_type type)
 {
    switch (type) {
-   case DZN_NO_INDEX: return 0;
-   case DZN_INDEX_2B: return 2;
-   case DZN_INDEX_4B: return 4;
+   case DZN_NO_INDEX:
+      return 0;
+   case DZN_INDEX_2B_WITH_PRIM_RESTART:
+   case DZN_INDEX_2B:
+      return 2;
+   case DZN_INDEX_4B_WITH_PRIM_RESTART:
+   case DZN_INDEX_4B:
+      return 4;
    default: unreachable("Invalid index type");
    }
 }



More information about the mesa-commit mailing list