[Mesa-dev] [PATCH v4 4/9] anv: Added support for non-dynamic sample locations on Gen8+

Eleni Maria Stea estea at igalia.com
Thu Mar 14 19:52:02 UTC 2019


Allowing the user to set custom sample locations non-dynamically, by
filling the extension structs and chaining them to the pipeline structs
according to the Vulkan specification section [26.5. Custom Sample
Locations] for the following structures:

'VkPipelineSampleLocationsStateCreateInfoEXT'
'VkSampleLocationsInfoEXT'
'VkSampleLocationEXT'

Once custom locations are used, the default locations are lost and need
to be re-emitted again in the next pipeline creation. For that, we emit
the 3DSTATE_SAMPLE_PATTERN at every pipeline creation.

v2: In v1, we used the custom anv_sample struct to store the location
    and the distance from the pixel center because we would then use
    this distance to sort the locations and send them in increasing
    monotonical order to the GPU. That was because the Skylake PRM Vol.
    2a "3DSTATE_SAMPLE_PATTERN" says that the samples must have
    monotonically increasing distance from the pixel center to get the
    correct centroid computation in the device. However, the Vulkan
    spec seems to require that the samples occur in the order provided
    through the API and this requirement is only for the standard
    locations. As long as this only affects centroid calculations as
    the docs say, we should be ok because OpenGL and Vulkan only
    require that the centroid be some lit sample and that it's the same
    for all samples in a pixel; they have no requirement that it be the
    one closest to center. (Jason Ekstrand)
    For that we made the following changes:
    1- We removed the custom structs and functions from anv_private.h
       and anv_sample_locations.h and anv_sample_locations.c (the last
       two files were removed). (Jason Ekstrand)
    2- We modified the macros used to take also the array as parameter
       and we renamed them to start by GEN_. (Jason Ekstrand)
    3- We don't sort the samples anymore. (Jason Ekstrand)
---
 src/intel/common/gen_sample_positions.h | 57 ++++++++++++++++++
 src/intel/vulkan/anv_genX.h             |  5 ++
 src/intel/vulkan/anv_private.h          |  1 +
 src/intel/vulkan/genX_pipeline.c        | 79 +++++++++++++++++++++----
 src/intel/vulkan/genX_state.c           | 72 ++++++++++++++++++++++
 5 files changed, 201 insertions(+), 13 deletions(-)

diff --git a/src/intel/common/gen_sample_positions.h b/src/intel/common/gen_sample_positions.h
index da48dcb5ed0..850661931cf 100644
--- a/src/intel/common/gen_sample_positions.h
+++ b/src/intel/common/gen_sample_positions.h
@@ -160,4 +160,61 @@ prefix##14YOffset  = 0.9375; \
 prefix##15XOffset  = 0.0625; \
 prefix##15YOffset  = 0.0000;
 
+/* Examples:
+ * in case of GEN_GEN < 8:
+ * GEN_SAMPLE_POS_ELEM(ms.Sample, info->pSampleLocations, 0); expands to:
+ *    ms.Sample0XOffset = info->pSampleLocations[0].pos.x;
+ *    ms.Sample0YOffset = info->pSampleLocations[0].y;
+ *
+ * in case of GEN_GEN >= 8:
+ * GEN_SAMPLE_POS_ELEM(sp._16xSample, info->pSampleLocations, 0); expands to:
+ *    sp._16xSample0XOffset = info->pSampleLocations[0].x;
+ *    sp._16xSample0YOffset = info->pSampleLocations[0].y;
+ */
+
+#define GEN_SAMPLE_POS_ELEM(prefix, arr, sample_idx) \
+prefix##sample_idx##XOffset = arr[sample_idx].x; \
+prefix##sample_idx##YOffset = arr[sample_idx].y;
+
+#define GEN_SAMPLE_POS_1X_ARRAY(prefix, arr)\
+GEN_SAMPLE_POS_ELEM(prefix, arr, 0);
+
+#define GEN_SAMPLE_POS_2X_ARRAY(prefix, arr) \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 1);
+
+#define GEN_SAMPLE_POS_4X_ARRAY(prefix, arr) \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 1); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 2); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 3);
+
+#define GEN_SAMPLE_POS_8X_ARRAY(prefix, arr) \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 1); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 2); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 3); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 4); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 5); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 6); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 7);
+
+#define GEN_SAMPLE_POS_16X_ARRAY(prefix, arr) \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 1); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 2); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 3); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 4); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 5); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 6); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 7); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 8); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 9); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 10); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 11); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 12); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 13); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 14); \
+GEN_SAMPLE_POS_ELEM(prefix, arr, 15);
+
 #endif /* GEN_SAMPLE_POSITIONS_H */
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index 8fd32cabf1e..fb7419b6347 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -88,3 +88,8 @@ void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
 
 void genX(blorp_exec)(struct blorp_batch *batch,
                       const struct blorp_params *params);
+
+void genX(emit_sample_locations)(struct anv_batch *batch,
+                                 const VkSampleLocationEXT *sl,
+                                 uint32_t num_samples,
+                                 bool custom_locations);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index eed282ff985..a39195733cd 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -165,6 +165,7 @@ struct gen_l3_config;
 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
+#define MAX_SAMPLE_LOCATIONS 16
 
 /* The kernel relocation API has a limitation of a 32-bit delta value
  * applied to the address before it is written which, in spite of it being
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 975052deb79..07b45db1988 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -548,12 +548,9 @@ emit_rs_state(struct anv_pipeline *pipeline,
 }
 
 static void
-emit_ms_state(struct anv_pipeline *pipeline,
-              const VkPipelineMultisampleStateCreateInfo *info)
+emit_sample_mask(struct anv_pipeline *pipeline,
+                 const VkPipelineMultisampleStateCreateInfo *info)
 {
-   uint32_t samples = 1;
-   uint32_t log2_samples = 0;
-
    /* From the Vulkan 1.0 spec:
     *    If pSampleMask is NULL, it is treated as if the mask has all bits
     *    enabled, i.e. no coverage is removed from fragments.
@@ -566,14 +563,19 @@ emit_ms_state(struct anv_pipeline *pipeline,
    uint32_t sample_mask = 0xff;
 #endif
 
-   if (info) {
-      samples = info->rasterizationSamples;
-      log2_samples = __builtin_ffs(samples) - 1;
-   }
-
    if (info && info->pSampleMask)
       sample_mask &= info->pSampleMask[0];
 
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
+      sm.SampleMask = sample_mask;
+   }
+}
+
+static void
+emit_multisample(struct anv_pipeline *pipeline,
+                 uint32_t samples,
+                 uint32_t log2_samples)
+{
    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
       ms.NumberofMultisamples       = log2_samples;
 
@@ -605,10 +607,61 @@ emit_ms_state(struct anv_pipeline *pipeline,
       }
 #endif
    }
+}
 
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
-      sm.SampleMask = sample_mask;
+static void
+emit_ms_state(struct anv_pipeline *pipeline,
+              const VkPipelineMultisampleStateCreateInfo *info,
+              const VkPipelineDynamicStateCreateInfo *dinfo)
+{
+#if GEN_GEN >= 8
+   VkSampleLocationsInfoEXT *sl;
+   bool custom_locations = false;
+#endif
+
+   uint32_t samples = 1;
+   uint32_t log2_samples = 0;
+
+   emit_sample_mask(pipeline, info);
+
+   if (info) {
+      samples = info->rasterizationSamples;
+
+#if GEN_GEN >= 8
+      if (info->pNext) {
+         VkPipelineSampleLocationsStateCreateInfoEXT *slinfo =
+            (VkPipelineSampleLocationsStateCreateInfoEXT *)info->pNext;
+
+         if (slinfo &&
+               (slinfo->sType ==
+                VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT)) {
+            if (slinfo->sampleLocationsEnable == VK_TRUE) {
+               uint32_t i;
+
+               for (i = 0; i < dinfo->dynamicStateCount; i++) {
+                  if (dinfo->pDynamicStates[i] ==
+                      VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT)
+                     return;
+               }
+
+               if ((sl = &slinfo->sampleLocationsInfo))
+                  samples = sl->sampleLocationsPerPixel;
+
+               custom_locations = true;
+            }
+         }
+      }
+#endif
+
+      log2_samples = __builtin_ffs(samples) - 1;
    }
+
+   emit_multisample(pipeline, samples, log2_samples);
+
+#if GEN_GEN >= 8
+   genX(emit_sample_locations)(&pipeline->batch, sl->pSampleLocations,
+                               samples, custom_locations);
+#endif
 }
 
 static const uint32_t vk_to_gen_logic_op[] = {
@@ -1938,7 +1991,7 @@ genX(graphics_pipeline_create)(
    assert(pCreateInfo->pRasterizationState);
    emit_rs_state(pipeline, pCreateInfo->pRasterizationState,
                  pCreateInfo->pMultisampleState, pass, subpass);
-   emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
+   emit_ms_state(pipeline, pCreateInfo->pMultisampleState, pCreateInfo->pDynamicState);
    emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass);
    emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
                            pCreateInfo->pMultisampleState);
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index cffd1e47247..3f628710b31 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -435,3 +435,75 @@ VkResult genX(CreateSampler)(
 
    return VK_SUCCESS;
 }
+
+void
+genX(emit_sample_locations)(struct anv_batch *batch,
+                            const VkSampleLocationEXT *sl,
+                            uint32_t num_samples,
+                            bool custom_locations)
+{
+   /* The Skylake PRM Vol. 2a "3DSTATE_SAMPLE_PATTERN" says:
+    * "When programming the sample offsets (for NUMSAMPLES_4 or _8 and
+    * MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 (or 7 for 8X,
+    * or 15 for 16X) must have monotonically increasing distance from the
+    * pixel center. This is required to get the correct centroid computation
+    * in the device."
+    *
+    * However, the Vulkan spec seems to require that the the samples occur in
+    * the order provided through the API. The standard sample patterns have
+    * the above property that they have monotonically increasing distances
+    * from the center but client-provided ones do not. As long as this only
+    * affects centroid calculations as the docs say, we should be ok because
+    * OpenGL and Vulkan only require that the centroid be some lit sample and
+    * that it's the same for all samples in a pixel; they have no requirement
+    * that it be the one closest to center.
+    */
+
+#if GEN_GEN >= 8
+
+#if GEN_GEN == 10
+   gen10_emit_wa_cs_stall_flush(batch);
+#endif
+
+   if (custom_locations) {
+      anv_batch_emit(batch,  GENX(3DSTATE_SAMPLE_PATTERN), sp) {
+         switch (num_samples) {
+         case 1:
+            GEN_SAMPLE_POS_1X_ARRAY(sp._1xSample, sl);
+            break;
+         case 2:
+            GEN_SAMPLE_POS_2X_ARRAY(sp._2xSample, sl);
+            break;
+         case 4:
+            GEN_SAMPLE_POS_4X_ARRAY(sp._4xSample, sl);
+            break;
+         case 8:
+            GEN_SAMPLE_POS_8X_ARRAY(sp._8xSample, sl);
+            break;
+#if GEN_GEN >= 9
+         case 16:
+            GEN_SAMPLE_POS_16X_ARRAY(sp._16xSample, sl);
+            break;
+#endif
+         default:
+            break;
+         }
+      }
+   } else {
+      anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) {
+         GEN_SAMPLE_POS_1X(sp._1xSample);
+         GEN_SAMPLE_POS_2X(sp._2xSample);
+         GEN_SAMPLE_POS_4X(sp._4xSample);
+         GEN_SAMPLE_POS_8X(sp._8xSample);
+#if GEN_GEN >= 9
+         GEN_SAMPLE_POS_16X(sp._16xSample);
+#endif
+      }
+   }
+
+#if GEN_GEN == 10
+   gen10_emit_wa_lri_to_cache_mode_zero(batch);
+#endif
+
+#endif
+}
-- 
2.20.1



More information about the mesa-dev mailing list