[Mesa-dev] [PATCH 6/7] radeonsi: use all SPI color formats
Axel Davy
axel.davy at ens.fr
Tue Jan 19 08:20:05 PST 2016
On 19/01/2016 17:11, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> because not using SPI_SHADER_32_ABGR doubles fill rate.
>
> We should also get optimal performance if alpha isn't needed or blending
> isn't enabled.
> ---
> src/gallium/drivers/radeon/r600_pipe_common.h | 6 +-
> src/gallium/drivers/radeonsi/si_blit.c | 8 +
> src/gallium/drivers/radeonsi/si_pipe.h | 4 +
> src/gallium/drivers/radeonsi/si_state.c | 207 +++++++++++++++++-------
> src/gallium/drivers/radeonsi/si_state.h | 5 +
> src/gallium/drivers/radeonsi/si_state_shaders.c | 23 ++-
> 6 files changed, 195 insertions(+), 58 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index f3271e2..d66e74f 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -236,6 +236,7 @@ struct r600_surface {
> /* Misc. color flags. */
> bool alphatest_bypass;
> bool export_16bpc;
> + bool color_is_int8;
>
> /* Color registers. */
> unsigned cb_color_info;
> @@ -252,7 +253,10 @@ struct r600_surface {
> unsigned cb_color_fmask_slice; /* EG and later */
> unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
> unsigned cb_color_mask; /* R600 only */
> - unsigned spi_shader_col_format; /* SI+ */
> + unsigned spi_shader_col_format; /* SI+, no blending, no alpha-to-coverage. */
> + unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */
> + unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */
> + unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
> unsigned sx_ps_downconvert; /* Stoney only */
> unsigned sx_blend_opt_epsilon; /* Stoney only */
> struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 75a9d56..a93887e 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -680,6 +680,14 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
> enum pipe_format format = int_to_norm_format(info->dst.format);
> unsigned sample_mask = ~0;
>
> + /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
> + * the format is R16G16. Use R16A16, which does work.
> + */
> + if (format == PIPE_FORMAT_R16G16_UNORM)
> + format = PIPE_FORMAT_R16A16_UNORM;
> + if (format == PIPE_FORMAT_R16G16_SNORM)
> + format = PIPE_FORMAT_R16A16_SNORM;
> +
> if (info->src.resource->nr_samples > 1 &&
> info->dst.resource->nr_samples <= 1 &&
> util_max_layer(info->src.resource, 0) == 0 &&
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index e2009de..e2725fe 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -126,6 +126,10 @@ struct si_framebuffer {
> unsigned cb0_is_integer;
> unsigned compressed_cb_mask;
> unsigned spi_shader_col_format;
> + unsigned spi_shader_col_format_alpha;
> + unsigned spi_shader_col_format_blend;
> + unsigned spi_shader_col_format_blend_alpha;
> + unsigned color_is_int8; /* bitmask */
> unsigned dirty_cbufs;
> bool dirty_zsbuf;
> };
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 492d3f9..42f5291 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -420,6 +420,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
> S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
> S_028B70_ALPHA_TO_MASK_OFFSET3(2));
>
> + if (state->alpha_to_coverage)
> + blend->need_src_alpha_4bit |= 0xf;
> +
> blend->cb_target_mask = 0;
> for (int i = 0; i < 8; i++) {
> /* state->rt entries > 0 only written if independent blending */
> @@ -457,6 +460,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
> blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
> }
> si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
> +
> + blend->blend_enable_4bit |= 0xf << (i * 4);
> +
> + /* This is only important for formats without alpha. */
> + if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
> + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
> + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
> + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
> + srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
> + dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
> + blend->need_src_alpha_4bit |= 0xf << (i * 4);
> }
>
> if (blend->cb_target_mask) {
> @@ -1270,53 +1284,6 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
> }
> }
>
> -/* Returns the size in bits of the widest component of a CB format */
> -static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
> -{
> - switch(colorformat) {
> - case V_028C70_COLOR_4_4_4_4:
> - return 4;
> -
> - case V_028C70_COLOR_1_5_5_5:
> - case V_028C70_COLOR_5_5_5_1:
> - return 5;
> -
> - case V_028C70_COLOR_5_6_5:
> - return 6;
> -
> - case V_028C70_COLOR_8:
> - case V_028C70_COLOR_8_8:
> - case V_028C70_COLOR_8_8_8_8:
> - return 8;
> -
> - case V_028C70_COLOR_10_10_10_2:
> - case V_028C70_COLOR_2_10_10_10:
> - return 10;
> -
> - case V_028C70_COLOR_10_11_11:
> - case V_028C70_COLOR_11_11_10:
> - return 11;
> -
> - case V_028C70_COLOR_16:
> - case V_028C70_COLOR_16_16:
> - case V_028C70_COLOR_16_16_16_16:
> - return 16;
> -
> - case V_028C70_COLOR_8_24:
> - case V_028C70_COLOR_24_8:
> - return 24;
> -
> - case V_028C70_COLOR_32:
> - case V_028C70_COLOR_32_32:
> - case V_028C70_COLOR_32_32_32_32:
> - case V_028C70_COLOR_X24_8_32_FLOAT:
> - return 32;
> - }
> -
> - assert(!"Unknown maximum component size");
> - return 0;
> -}
> -
> static uint32_t si_translate_dbformat(enum pipe_format format)
> {
> switch (format) {
> @@ -1886,17 +1853,119 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool sten
>
> static void si_choose_spi_color_formats(struct r600_surface *surf,
> unsigned format, unsigned swap,
> - unsigned ntype)
> + unsigned ntype, bool is_depth)
> {
> - unsigned max_comp_size = si_colorformat_max_comp_size(format);
> + /* Alpha is needed for alpha-to-coverage.
> + * Blending may be with or without alpha.
> + */
> + unsigned normal = 0; /* most optimal, may not support blending or export alpha */
> + unsigned alpha = 0; /* exports alpha, but may not support blending */
> + unsigned blend = 0; /* supports blending, but may not export alpha */
> + unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
>
> - surf->spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
> + /* Choose the SPI color formats. These are required values for Stoney/RB+.
> + * Other chips have multiple choices, though they are not necessarily better.
> + */
> + switch (format) {
> + case V_028C70_COLOR_5_6_5:
> + case V_028C70_COLOR_1_5_5_5:
> + case V_028C70_COLOR_5_5_5_1:
> + case V_028C70_COLOR_4_4_4_4:
> + case V_028C70_COLOR_10_11_11:
> + case V_028C70_COLOR_11_11_10:
> + case V_028C70_COLOR_8:
> + case V_028C70_COLOR_8_8:
> + case V_028C70_COLOR_8_8_8_8:
> + case V_028C70_COLOR_10_10_10_2:
> + case V_028C70_COLOR_2_10_10_10:
> + if (ntype == V_028C70_NUMBER_UINT)
> + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
Hi,
The documentation of the "BLEND_BYPASS" bit says it should be set if
SINT or UINT is used.
I deduce blending is not possible with these formats, and thus I guess
here you cannot use them
for blend and blend_alpha.
Yours,
Axel
> + else if (ntype == V_028C70_NUMBER_SINT)
> + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
> + else
> + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
> + break;
> +
> + case V_028C70_COLOR_16:
> + case V_028C70_COLOR_16_16:
> + case V_028C70_COLOR_16_16_16_16:
> + if (ntype == V_028C70_NUMBER_UNORM ||
> + ntype == V_028C70_NUMBER_SNORM) {
> + /* UNORM16 and SNORM16 don't support blending */
> + if (ntype == V_028C70_NUMBER_UNORM)
> + normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
> + else
> + normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
> +
> + /* Use 32 bits per channel for blending. */
> + if (format == V_028C70_COLOR_16) {
> + if (swap == V_028C70_SWAP_STD) { /* R */
> + blend = V_028714_SPI_SHADER_32_R;
> + blend_alpha = V_028714_SPI_SHADER_32_AR;
> + } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
> + blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
> + else
> + assert(0);
> + } else if (format == V_028C70_COLOR_16_16) {
> + if (swap == V_028C70_SWAP_STD) { /* RG */
> + blend = V_028714_SPI_SHADER_32_GR;
> + blend_alpha = V_028714_SPI_SHADER_32_ABGR;
> + } else if (swap == V_028C70_SWAP_ALT) /* RA */
> + blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
> + else
> + assert(0);
> + } else /* 16_16_16_16 */
> + blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
> + } else if (ntype == V_028C70_NUMBER_UINT)
> + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
> + else if (ntype == V_028C70_NUMBER_SINT)
> + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
> + else if (ntype == V_028C70_NUMBER_FLOAT)
> + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
> + else
> + assert(0);
> + break;
>
> - if (ntype == V_028C70_NUMBER_SRGB ||
> - ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
> - max_comp_size <= 10) ||
> - (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16))
> - surf->spi_shader_col_format = V_028714_SPI_SHADER_FP16_ABGR;
> + case V_028C70_COLOR_32:
> + if (swap == V_028C70_SWAP_STD) { /* R */
> + blend = normal = V_028714_SPI_SHADER_32_R;
> + alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
> + } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
> + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
> + else
> + assert(0);
> + break;
> +
> + case V_028C70_COLOR_32_32:
> + if (swap == V_028C70_SWAP_STD) { /* RG */
> + blend = normal = V_028714_SPI_SHADER_32_GR;
> + alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
> + } else if (swap == V_028C70_SWAP_ALT) /* RA */
> + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
> + else
> + assert(0);
> + break;
> +
> + case V_028C70_COLOR_32_32_32_32:
> + case V_028C70_COLOR_8_24:
> + case V_028C70_COLOR_24_8:
> + case V_028C70_COLOR_X24_8_32_FLOAT:
> + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
> + break;
> +
> + default:
> + assert(0);
> + return;
> + }
> +
> + /* The DB->CB copy needs 32_ABGR. */
> + if (is_depth)
> + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
> +
> + surf->spi_shader_col_format = normal;
> + surf->spi_shader_col_format_alpha = alpha;
> + surf->spi_shader_col_format_blend = blend;
> + surf->spi_shader_col_format_blend_alpha = blend_alpha;
> }
>
> static void si_initialize_color_surface(struct si_context *sctx,
> @@ -1989,6 +2058,12 @@ static void si_initialize_color_surface(struct si_context *sctx,
> blend_bypass = 1;
> }
>
> + if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
> + (format == V_028C70_COLOR_8 ||
> + format == V_028C70_COLOR_8_8 ||
> + format == V_028C70_COLOR_8_8_8_8))
> + surf->color_is_int8 = true;
> +
> color_info = S_028C70_FORMAT(format) |
> S_028C70_COMP_SWAP(swap) |
> S_028C70_BLEND_CLAMP(blend_clamp) |
> @@ -2068,7 +2143,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
> }
>
> /* Determine pixel shader export format */
> - si_choose_spi_color_formats(surf, format, swap, ntype);
> + si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
>
> if (sctx->b.family == CHIP_STONEY &&
> !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
> @@ -2296,6 +2371,11 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
> util_copy_framebuffer_state(&sctx->framebuffer.state, state);
>
> sctx->framebuffer.spi_shader_col_format = 0;
> + sctx->framebuffer.spi_shader_col_format_alpha = 0;
> + sctx->framebuffer.spi_shader_col_format_blend = 0;
> + sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
> + sctx->framebuffer.color_is_int8 = 0;
> +
> sctx->framebuffer.compressed_cb_mask = 0;
> sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
> sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
> @@ -2318,6 +2398,15 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>
> sctx->framebuffer.spi_shader_col_format |=
> surf->spi_shader_col_format << (i * 4);
> + sctx->framebuffer.spi_shader_col_format_alpha |=
> + surf->spi_shader_col_format_alpha << (i * 4);
> + sctx->framebuffer.spi_shader_col_format_blend |=
> + surf->spi_shader_col_format_blend << (i * 4);
> + sctx->framebuffer.spi_shader_col_format_blend_alpha |=
> + surf->spi_shader_col_format_blend_alpha << (i * 4);
> +
> + if (surf->color_is_int8)
> + sctx->framebuffer.color_is_int8 |= 1 << i;
>
> if (rtex->fmask.size && rtex->cmask.size) {
> sctx->framebuffer.compressed_cb_mask |= 1 << i;
> @@ -2328,6 +2417,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
> if (i == 1 && surf) {
> sctx->framebuffer.spi_shader_col_format |=
> surf->spi_shader_col_format << (i * 4);
> + sctx->framebuffer.spi_shader_col_format_alpha |=
> + surf->spi_shader_col_format_alpha << (i * 4);
> + sctx->framebuffer.spi_shader_col_format_blend |=
> + surf->spi_shader_col_format_blend << (i * 4);
> + sctx->framebuffer.spi_shader_col_format_blend_alpha |=
> + surf->spi_shader_col_format_blend_alpha << (i * 4);
> }
>
> if (state->zsbuf) {
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 46ba3c4..be3488e 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -42,6 +42,11 @@ struct si_state_blend {
> bool alpha_to_coverage;
> bool alpha_to_one;
> bool dual_src_blend;
> + /* Set 0xf or 0x0 (4 bits) per render target if the following is
> + * true. ANDed with spi_shader_col_format.
> + */
> + unsigned blend_enable_4bit;
> + unsigned need_src_alpha_4bit;
> };
>
> struct si_state_rasterizer {
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 158f1ce..80126f2 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -616,7 +616,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
> sel->info.colors_written == 0x1)
> key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
>
> - key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
> + if (blend) {
> + /* Select the shader color format based on whether
> + * blending or alpha are needed.
> + */
> + key->ps.spi_shader_col_format =
> + (blend->blend_enable_4bit & blend->need_src_alpha_4bit &
> + sctx->framebuffer.spi_shader_col_format_blend_alpha) |
> + (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
> + sctx->framebuffer.spi_shader_col_format_blend) |
> + (~blend->blend_enable_4bit & blend->need_src_alpha_4bit &
> + sctx->framebuffer.spi_shader_col_format_alpha) |
> + (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
> + sctx->framebuffer.spi_shader_col_format);
> + } else
> + key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
>
> /* If alpha-to-coverage is enabled, we have to export alpha
> * even if there is no color buffer.
> @@ -625,6 +639,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
> blend && blend->alpha_to_coverage)
> key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
>
> + /* On SI and CIK except Hawaii, the CB doesn't clamp outputs
> + * to the range supported by the type if a channel has less
> + * than 16 bits and the export format is 16_ABGR.
> + */
> + if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
> + key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;
> +
> if (rs) {
> bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
> sctx->current_rast_prim <= PIPE_PRIM_POLYGON) ||
More information about the mesa-dev
mailing list