[Mesa-dev] [PATCH v4 4/9] anv: Added support for non-dynamic sample locations on Gen8+
Jason Ekstrand
jason at jlekstrand.net
Thu Mar 14 22:09:55 UTC 2019
On Thu, Mar 14, 2019 at 2:52 PM Eleni Maria Stea <estea at igalia.com> wrote:
> Allowing the user to set custom sample locations non-dynamically, by
> filling the extension structs and chaining them to the pipeline structs
> according to the Vulkan specification section [26.5. Custom Sample
> Locations] for the following structures:
>
> 'VkPipelineSampleLocationsStateCreateInfoEXT'
> 'VkSampleLocationsInfoEXT'
> 'VkSampleLocationEXT'
>
> Once custom locations are used, the default locations are lost and need
> to be re-emitted again in the next pipeline creation. For that, we emit
> the 3DSTATE_SAMPLE_PATTERN at every pipeline creation.
>
> v2: In v1, we used the custom anv_sample struct to store the location
> and the distance from the pixel center because we would then use
> this distance to sort the locations and send them in increasing
> monotonical order to the GPU. That was because the Skylake PRM Vol.
> 2a "3DSTATE_SAMPLE_PATTERN" says that the samples must have
> monotonically increasing distance from the pixel center to get the
> correct centroid computation in the device. However, the Vulkan
> spec seems to require that the samples occur in the order provided
> through the API and this requirement is only for the standard
> locations. As long as this only affects centroid calculations as
> the docs say, we should be ok because OpenGL and Vulkan only
> require that the centroid be some lit sample and that it's the same
> for all samples in a pixel; they have no requirement that it be the
> one closest to center. (Jason Ekstrand)
> For that we made the following changes:
> 1- We removed the custom structs and functions from anv_private.h
> and anv_sample_locations.h and anv_sample_locations.c (the last
> two files were removed). (Jason Ekstrand)
> 2- We modified the macros used to take also the array as parameter
> and we renamed them to start by GEN_. (Jason Ekstrand)
> 3- We don't sort the samples anymore. (Jason Ekstrand)
> ---
> src/intel/common/gen_sample_positions.h | 57 ++++++++++++++++++
> src/intel/vulkan/anv_genX.h | 5 ++
> src/intel/vulkan/anv_private.h | 1 +
> src/intel/vulkan/genX_pipeline.c | 79 +++++++++++++++++++++----
> src/intel/vulkan/genX_state.c | 72 ++++++++++++++++++++++
> 5 files changed, 201 insertions(+), 13 deletions(-)
>
> diff --git a/src/intel/common/gen_sample_positions.h
> b/src/intel/common/gen_sample_positions.h
> index da48dcb5ed0..850661931cf 100644
> --- a/src/intel/common/gen_sample_positions.h
> +++ b/src/intel/common/gen_sample_positions.h
> @@ -160,4 +160,61 @@ prefix##14YOffset = 0.9375; \
> prefix##15XOffset = 0.0625; \
> prefix##15YOffset = 0.0000;
>
> +/* Examples:
> + * in case of GEN_GEN < 8:
> + * GEN_SAMPLE_POS_ELEM(ms.Sample, info->pSampleLocations, 0); expands to:
> + * ms.Sample0XOffset = info->pSampleLocations[0].pos.x;
> + * ms.Sample0YOffset = info->pSampleLocations[0].y;
> + *
> + * in case of GEN_GEN >= 8:
> + * GEN_SAMPLE_POS_ELEM(sp._16xSample, info->pSampleLocations, 0); expands
> to:
> + * sp._16xSample0XOffset = info->pSampleLocations[0].x;
> + * sp._16xSample0YOffset = info->pSampleLocations[0].y;
> + */
> +
> +#define GEN_SAMPLE_POS_ELEM(prefix, arr, sample_idx) \
> +prefix##sample_idx##XOffset = arr[sample_idx].x; \
> +prefix##sample_idx##YOffset = arr[sample_idx].y;
> +
> +#define GEN_SAMPLE_POS_1X_ARRAY(prefix, arr)\
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 0);
> +
> +#define GEN_SAMPLE_POS_2X_ARRAY(prefix, arr) \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 1);
> +
> +#define GEN_SAMPLE_POS_4X_ARRAY(prefix, arr) \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 1); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 2); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 3);
> +
> +#define GEN_SAMPLE_POS_8X_ARRAY(prefix, arr) \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 1); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 2); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 3); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 4); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 5); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 6); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 7);
> +
> +#define GEN_SAMPLE_POS_16X_ARRAY(prefix, arr) \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 1); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 2); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 3); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 4); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 5); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 6); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 7); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 8); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 9); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 10); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 11); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 12); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 13); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 14); \
> +GEN_SAMPLE_POS_ELEM(prefix, arr, 15);
> +
> #endif /* GEN_SAMPLE_POSITIONS_H */
> diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
> index 8fd32cabf1e..fb7419b6347 100644
> --- a/src/intel/vulkan/anv_genX.h
> +++ b/src/intel/vulkan/anv_genX.h
> @@ -88,3 +88,8 @@ void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer
> *cmd_buffer,
>
> void genX(blorp_exec)(struct blorp_batch *batch,
> const struct blorp_params *params);
> +
> +void genX(emit_sample_locations)(struct anv_batch *batch,
> + const VkSampleLocationEXT *sl,
> + uint32_t num_samples,
> + bool custom_locations);
>
You probably don't need the "custom_locations bool. You could just use sl
== NULL or num_samples == 0 for that.
> diff --git a/src/intel/vulkan/anv_private.h
> b/src/intel/vulkan/anv_private.h
> index eed282ff985..a39195733cd 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -165,6 +165,7 @@ struct gen_l3_config;
> #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
> #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
> #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
> +#define MAX_SAMPLE_LOCATIONS 16
>
> /* The kernel relocation API has a limitation of a 32-bit delta value
> * applied to the address before it is written which, in spite of it being
> diff --git a/src/intel/vulkan/genX_pipeline.c
> b/src/intel/vulkan/genX_pipeline.c
> index 975052deb79..07b45db1988 100644
> --- a/src/intel/vulkan/genX_pipeline.c
> +++ b/src/intel/vulkan/genX_pipeline.c
> @@ -548,12 +548,9 @@ emit_rs_state(struct anv_pipeline *pipeline,
> }
>
> static void
> -emit_ms_state(struct anv_pipeline *pipeline,
> - const VkPipelineMultisampleStateCreateInfo *info)
> +emit_sample_mask(struct anv_pipeline *pipeline,
> + const VkPipelineMultisampleStateCreateInfo *info)
> {
> - uint32_t samples = 1;
> - uint32_t log2_samples = 0;
> -
> /* From the Vulkan 1.0 spec:
> * If pSampleMask is NULL, it is treated as if the mask has all bits
> * enabled, i.e. no coverage is removed from fragments.
> @@ -566,14 +563,19 @@ emit_ms_state(struct anv_pipeline *pipeline,
> uint32_t sample_mask = 0xff;
> #endif
>
> - if (info) {
> - samples = info->rasterizationSamples;
> - log2_samples = __builtin_ffs(samples) - 1;
> - }
> -
> if (info && info->pSampleMask)
> sample_mask &= info->pSampleMask[0];
>
> + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
> + sm.SampleMask = sample_mask;
> + }
> +}
> +
> +static void
> +emit_multisample(struct anv_pipeline *pipeline,
> + uint32_t samples,
> + uint32_t log2_samples)
> +{
>
Is there some particular reason why you're breaking emit_ms_state up into
multiple functions? It seems unrelated to enabling this feature and it
makes this patch very hard to read.
> anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
> ms.NumberofMultisamples = log2_samples;
>
> @@ -605,10 +607,61 @@ emit_ms_state(struct anv_pipeline *pipeline,
> }
> #endif
> }
> +}
>
> - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
> - sm.SampleMask = sample_mask;
> +static void
> +emit_ms_state(struct anv_pipeline *pipeline,
> + const VkPipelineMultisampleStateCreateInfo *info,
> + const VkPipelineDynamicStateCreateInfo *dinfo)
> +{
> +#if GEN_GEN >= 8
> + VkSampleLocationsInfoEXT *sl;
> + bool custom_locations = false;
> +#endif
> +
> + uint32_t samples = 1;
> + uint32_t log2_samples = 0;
> +
> + emit_sample_mask(pipeline, info);
> +
> + if (info) {
> + samples = info->rasterizationSamples;
> +
> +#if GEN_GEN >= 8
> + if (info->pNext) {
> + VkPipelineSampleLocationsStateCreateInfoEXT *slinfo =
> + (VkPipelineSampleLocationsStateCreateInfoEXT *)info->pNext;
> +
> + if (slinfo &&
> + (slinfo->sType ==
> +
> VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT)) {
> + if (slinfo->sampleLocationsEnable == VK_TRUE) {
> + uint32_t i;
> +
> + for (i = 0; i < dinfo->dynamicStateCount; i++) {
> + if (dinfo->pDynamicStates[i] ==
> + VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT)
> + return;
> + }
> +
> + if ((sl = &slinfo->sampleLocationsInfo))
> + samples = sl->sampleLocationsPerPixel;
>
You should be able to assert that these are equal
> +
> + custom_locations = true;
> + }
> + }
> + }
>
There are two ways to do this both of which are easier and less complicated
than what you have here:
1. vk_foreach_struct() and a switch statement like we do for the
Properties2 queries
2. vk_find_struct_const() to just look for the one struct.
> +#endif
> +
> + log2_samples = __builtin_ffs(samples) - 1;
> }
> +
> + emit_multisample(pipeline, samples, log2_samples);
> +
> +#if GEN_GEN >= 8
> + genX(emit_sample_locations)(&pipeline->batch, sl->pSampleLocations,
> + samples, custom_locations);
> +#endif
> }
>
> static const uint32_t vk_to_gen_logic_op[] = {
> @@ -1938,7 +1991,7 @@ genX(graphics_pipeline_create)(
> assert(pCreateInfo->pRasterizationState);
> emit_rs_state(pipeline, pCreateInfo->pRasterizationState,
> pCreateInfo->pMultisampleState, pass, subpass);
> - emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
> + emit_ms_state(pipeline, pCreateInfo->pMultisampleState,
> pCreateInfo->pDynamicState);
> emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass,
> subpass);
> emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
> pCreateInfo->pMultisampleState);
> diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
> index cffd1e47247..3f628710b31 100644
> --- a/src/intel/vulkan/genX_state.c
> +++ b/src/intel/vulkan/genX_state.c
> @@ -435,3 +435,75 @@ VkResult genX(CreateSampler)(
>
> return VK_SUCCESS;
> }
> +
> +void
> +genX(emit_sample_locations)(struct anv_batch *batch,
> + const VkSampleLocationEXT *sl,
> + uint32_t num_samples,
> + bool custom_locations)
> +{
> + /* The Skylake PRM Vol. 2a "3DSTATE_SAMPLE_PATTERN" says:
> + * "When programming the sample offsets (for NUMSAMPLES_4 or _8 and
> + * MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 (or 7 for
> 8X,
> + * or 15 for 16X) must have monotonically increasing distance from the
> + * pixel center. This is required to get the correct centroid
> computation
> + * in the device."
> + *
> + * However, the Vulkan spec seems to require that the the samples
> occur in
> + * the order provided through the API. The standard sample patterns
> have
> + * the above property that they have monotonically increasing distances
> + * from the center but client-provided ones do not. As long as this
> only
> + * affects centroid calculations as the docs say, we should be ok
> because
> + * OpenGL and Vulkan only require that the centroid be some lit sample
> and
> + * that it's the same for all samples in a pixel; they have no
> requirement
> + * that it be the one closest to center.
> + */
> +
> +#if GEN_GEN >= 8
> +
> +#if GEN_GEN == 10
> + gen10_emit_wa_cs_stall_flush(batch);
> +#endif
> +
> + if (custom_locations) {
> + anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) {
> + switch (num_samples) {
> + case 1:
> + GEN_SAMPLE_POS_1X_ARRAY(sp._1xSample, sl);
> + break;
> + case 2:
> + GEN_SAMPLE_POS_2X_ARRAY(sp._2xSample, sl);
> + break;
> + case 4:
> + GEN_SAMPLE_POS_4X_ARRAY(sp._4xSample, sl);
> + break;
> + case 8:
> + GEN_SAMPLE_POS_8X_ARRAY(sp._8xSample, sl);
> + break;
> +#if GEN_GEN >= 9
> + case 16:
> + GEN_SAMPLE_POS_16X_ARRAY(sp._16xSample, sl);
> + break;
> +#endif
> + default:
> + break;
> + }
> + }
> + } else {
> + anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) {
> + GEN_SAMPLE_POS_1X(sp._1xSample);
> + GEN_SAMPLE_POS_2X(sp._2xSample);
> + GEN_SAMPLE_POS_4X(sp._4xSample);
> + GEN_SAMPLE_POS_8X(sp._8xSample);
> +#if GEN_GEN >= 9
> + GEN_SAMPLE_POS_16X(sp._16xSample);
> +#endif
> + }
> + }
> +
> +#if GEN_GEN == 10
> + gen10_emit_wa_lri_to_cache_mode_zero(batch);
> +#endif
> +
> +#endif
> +}
> --
> 2.20.1
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190314/65e90ac5/attachment-0001.html>
More information about the mesa-dev
mailing list