[Mesa-dev] [PATCH] radv: export SampleMask from pixel shaders at full rate

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Wed Dec 13 20:21:41 UTC 2017


On Tue, Dec 12, 2017 at 6:08 PM, Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
> Use 16_ABGR instead of 32_ABGR if Z isn't written.
>
> Ported from RadeonSI.
>
> No CTS regressions on Polaris.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  src/amd/common/ac_nir_to_llvm.c | 65 ++++++++++++++++++++++++++++++++++-------
>  src/amd/vulkan/radv_pipeline.c  | 29 ++++++++++++++----
>  2 files changed, 78 insertions(+), 16 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 663b27d265..5916619e97 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -6166,6 +6166,26 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
>         return true;
>  }
>
> +static unsigned
> +si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
> +                          bool writes_samplemask)
> +{
> +       if (writes_z) {
> +               /* Z needs 32 bits. */
> +               if (writes_samplemask)
> +                       return V_028710_SPI_SHADER_32_ABGR;
> +               else if (writes_stencil)
> +                       return V_028710_SPI_SHADER_32_GR;
> +               else
> +                       return V_028710_SPI_SHADER_32_R;
> +       } else if (writes_stencil || writes_samplemask) {
> +               /* Both stencil and sample mask need only 16 bits. */
> +               return V_028710_SPI_SHADER_UINT16_ABGR;
> +       } else {
> +               return V_028710_SPI_SHADER_ZERO;
> +       }
> +}

I'm not a fan of having this function in two places. Can we export the
format from the compiler to radv, or the other way around?

Otherwise,

Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

> +
>  static void
>  si_export_mrt_z(struct nir_to_llvm_context *ctx,
>                 LLVMValueRef depth, LLVMValueRef stencil,
> @@ -6184,19 +6204,42 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
>         args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */
>         args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */
>
> -       if (depth) {
> -               args.out[0] = depth;
> -               args.enabled_channels |= 0x1;
> -       }
> +       unsigned format = si_get_spi_shader_z_format(depth != NULL,
> +                                                    stencil != NULL,
> +                                                    samplemask != NULL);
> +
> +       if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
> +               assert(!depth);
> +               args.compr = 1; /* COMPR flag */
> +
> +               if (stencil) {
> +                       /* Stencil should be in X[23:16]. */
> +                       stencil = ac_to_integer(&ctx->ac, stencil);
> +                       stencil = LLVMBuildShl(ctx->builder, stencil,
> +                                              LLVMConstInt(ctx->ac.i32, 16, 0), "");
> +                       args.out[0] = ac_to_float(&ctx->ac, stencil);
> +                       args.enabled_channels |= 0x3;
> +               }
> +               if (samplemask) {
> +                       /* SampleMask should be in Y[15:0]. */
> +                       args.out[1] = samplemask;
> +                       args.enabled_channels |= 0xc;
> +               }
> +       } else {
> +               if (depth) {
> +                       args.out[0] = depth;
> +                       args.enabled_channels |= 0x1;
> +               }
>
> -       if (stencil) {
> -               args.out[1] = stencil;
> -               args.enabled_channels |= 0x2;
> -       }
> +               if (stencil) {
> +                       args.out[1] = stencil;
> +                       args.enabled_channels |= 0x2;
> +               }
>
> -       if (samplemask) {
> -               args.out[2] = samplemask;
> -               args.enabled_channels |= 0x4;
> +               if (samplemask) {
> +                       args.out[2] = samplemask;
> +                       args.enabled_channels |= 0x4;
> +               }
>         }
>
>         /* SI (except OLAND and HAINAN) has a bug that it only looks
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 0146d6935e..baaf5c4c77 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -2013,6 +2013,25 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline,
>         }
>  }
>
> +static unsigned
> +si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
> +                          bool writes_samplemask)
> +{
> +       if (writes_z) {
> +               /* Z needs 32 bits. */
> +               if (writes_samplemask)
> +                       return V_028710_SPI_SHADER_32_ABGR;
> +               else if (writes_stencil)
> +                       return V_028710_SPI_SHADER_32_GR;
> +               else
> +                       return V_028710_SPI_SHADER_32_R;
> +       } else if (writes_stencil || writes_samplemask) {
> +               /* Both stencil and sample mask need only 16 bits. */
> +               return V_028710_SPI_SHADER_UINT16_ABGR;
> +       } else {
> +               return V_028710_SPI_SHADER_ZERO;
> +       }
> +}
>
>  static VkResult
>  radv_pipeline_init(struct radv_pipeline *pipeline,
> @@ -2108,11 +2127,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
>         if (pipeline->device->physical_device->has_rbplus)
>                 pipeline->graphics.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
>
> -       pipeline->graphics.shader_z_format =
> -               ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR :
> -               ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR :
> -               ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
> -               V_028710_SPI_SHADER_ZERO;
> +       unsigned shader_z_format =
> +               si_get_spi_shader_z_format(ps->info.fs.writes_z,
> +                                          ps->info.fs.writes_stencil,
> +                                          ps->info.fs.writes_sample_mask);
> +       pipeline->graphics.shader_z_format = shader_z_format;
>
>         calculate_vgt_gs_mode(pipeline);
>         calculate_vs_outinfo(pipeline);
> --
> 2.15.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list