[Mesa-dev] [PATCH] radeonsi: implement RB+ for Stoney (v2)
Alex Deucher
alexdeucher at gmail.com
Thu Dec 10 09:19:12 PST 2015
On Wed, Dec 9, 2015 at 5:35 PM, Marek Olšák <maraeo at gmail.com> wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> v2: fix dual source blending
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> src/gallium/drivers/radeon/r600_pipe_common.c | 1 +
> src/gallium/drivers/radeon/r600_pipe_common.h | 3 +
> src/gallium/drivers/radeon/r600_texture.c | 6 +
> src/gallium/drivers/radeonsi/si_state.c | 159 +++++++++++++++++++++++++-
> src/gallium/drivers/radeonsi/sid.h | 3 +
> 5 files changed, 170 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
> index 8899ba4..ba541ac 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -375,6 +375,7 @@ static const struct debug_named_value common_debug_options[] = {
> { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
> { "nodcc", DBG_NO_DCC, "Disable DCC." },
> { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
> + { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
>
> DEBUG_NAMED_VALUE_END /* must be last */
> };
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 8c6c0c3..dd23ed5 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -86,6 +86,7 @@
> #define DBG_CHECK_VM (1llu << 42)
> #define DBG_NO_DCC (1llu << 43)
> #define DBG_NO_DCC_CLEAR (1llu << 44)
> +#define DBG_NO_RB_PLUS (1llu << 45)
>
> #define R600_MAP_BUFFER_ALIGNMENT 64
>
> @@ -250,6 +251,8 @@ struct r600_surface {
> unsigned cb_color_fmask_slice; /* EG and later */
> unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
> unsigned cb_color_mask; /* R600 only */
> + unsigned sx_ps_downconvert; /* Stoney only */
> + unsigned sx_blend_opt_epsilon; /* Stoney only */
> struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
> struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
>
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 774722f..8c145e5 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -1393,6 +1393,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
> return;
>
> for (i = 0; i < fb->nr_cbufs; i++) {
> + struct r600_surface *surf;
> struct r600_texture *tex;
> unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
>
> @@ -1403,6 +1404,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
> if (!(*buffers & clear_bit))
> continue;
>
> + surf = (struct r600_surface *)fb->cbufs[i];
> tex = (struct r600_texture *)fb->cbufs[i]->texture;
>
> /* 128-bit formats are unusupported */
> @@ -1449,6 +1451,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
> if (clear_words_needed)
> tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
> } else {
> + /* RB+ doesn't work with CMASK fast clear. */
> + if (surf->sx_ps_downconvert)
> + continue;
> +
> /* ensure CMASK is enabled */
> r600_texture_alloc_cmask_separate(rctx->screen, tex);
> if (tex->cmask.size == 0) {
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 2ebfa1c..dcf4a7b 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -347,10 +347,54 @@ static uint32_t si_translate_blend_factor(int blend_fact)
> return 0;
> }
>
> +static uint32_t si_translate_blend_opt_function(int blend_func)
> +{
> + switch (blend_func) {
> + case PIPE_BLEND_ADD:
> + return V_028760_OPT_COMB_ADD;
> + case PIPE_BLEND_SUBTRACT:
> + return V_028760_OPT_COMB_SUBTRACT;
> + case PIPE_BLEND_REVERSE_SUBTRACT:
> + return V_028760_OPT_COMB_REVSUBTRACT;
> + case PIPE_BLEND_MIN:
> + return V_028760_OPT_COMB_MIN;
> + case PIPE_BLEND_MAX:
> + return V_028760_OPT_COMB_MAX;
> + default:
> + return V_028760_OPT_COMB_BLEND_DISABLED;
> + }
> +}
> +
> +static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
> +{
> + switch (blend_fact) {
> + case PIPE_BLENDFACTOR_ZERO:
> + return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
> + case PIPE_BLENDFACTOR_ONE:
> + return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
> + case PIPE_BLENDFACTOR_SRC_COLOR:
> + return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
> + : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
> + case PIPE_BLENDFACTOR_INV_SRC_COLOR:
> + return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
> + : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
> + case PIPE_BLENDFACTOR_SRC_ALPHA:
> + return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
> + case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
> + return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
> + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
> + return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
> + : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
> + default:
> + return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
> + }
> +}
> +
> static void *si_create_blend_state_mode(struct pipe_context *ctx,
> const struct pipe_blend_state *state,
> unsigned mode)
> {
> + struct si_context *sctx = (struct si_context*)ctx;
> struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
> struct si_pm4_state *pm4 = &blend->pm4;
>
> @@ -416,8 +460,47 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
> } else {
> color_control |= S_028808_MODE(V_028808_CB_DISABLE);
> }
> - si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
>
> + if (sctx->b.family == CHIP_STONEY) {
> + uint32_t sx_blend_opt_control = 0;
> +
> + for (int i = 0; i < 8; i++) {
> + const int j = state->independent_blend_enable ? i : 0;
> +
> + /* TODO: We can also set this if the surface doesn't contain RGB. */
> + if (!state->rt[j].blend_enable ||
> + !(state->rt[j].colormask & (PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B)))
> + sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (4 * i);
> +
> + /* TODO: We can also set this if the surface doesn't contain alpha. */
> + if (!state->rt[j].blend_enable ||
> + !(state->rt[j].colormask & PIPE_MASK_A))
> + sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (4 * i);
> +
> + if (!state->rt[j].blend_enable) {
> + si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
> + S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
> + S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED));
> + continue;
> + }
> +
> + si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
> + S_028760_COLOR_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_src_factor, false)) |
> + S_028760_COLOR_DST_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_dst_factor, false)) |
> + S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(state->rt[j].rgb_func)) |
> + S_028760_ALPHA_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_src_factor, true)) |
> + S_028760_ALPHA_DST_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_dst_factor, true)) |
> + S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(state->rt[j].alpha_func)));
> + }
> +
> + si_pm4_set_reg(pm4, R_02875C_SX_BLEND_OPT_CONTROL, sx_blend_opt_control);
> +
> + /* RB+ doesn't work with dual source blending */
> + if (blend->dual_src_blend)
> + color_control |= S_028808_DISABLE_DUAL_QUAD(1);
> + }
> +
> + si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
> return blend;
> }
>
> @@ -1057,6 +1140,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
> if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
> db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
>
> + if (sctx->b.family == CHIP_STONEY &&
> + sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
> + db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
> +
> radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
> db_shader_control);
> }
> @@ -1993,6 +2080,61 @@ static void si_initialize_color_surface(struct si_context *sctx,
> surf->export_16bpc = true;
> }
>
> + if (sctx->b.family == CHIP_STONEY &&
> + !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
> + switch (desc->channel[0].size) {
> + case 32:
> + if (desc->nr_channels == 1) {
> + if (swap == V_0280A0_SWAP_STD)
> + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
> + else if (swap == V_0280A0_SWAP_ALT_REV)
> + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_A;
> + }
> + break;
> + case 16:
> + /* For 1-channel formats, use the superset thereof. */
> + if (desc->nr_channels <= 2) {
> + if (swap == V_0280A0_SWAP_STD ||
> + swap == V_0280A0_SWAP_STD_REV)
> + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_GR;
> + else
> + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_AR;
> + }
> + break;
> + case 11:
> + if (desc->nr_channels == 3) {
> + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_10_11_11;
> + surf->sx_blend_opt_epsilon = V_028758_11BIT_FORMAT;
> + }
> + break;
> + case 10:
> + if (desc->nr_channels == 4) {
> + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_2_10_10_10;
> + surf->sx_blend_opt_epsilon = V_028758_10BIT_FORMAT;
> + }
> + break;
> + case 8:
> + /* For 1 and 2-channel formats, use the superset thereof. */
> + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_8_8_8_8;
> + surf->sx_blend_opt_epsilon = V_028758_8BIT_FORMAT;
> + break;
> + case 5:
> + if (desc->nr_channels == 3) {
> + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_5_6_5;
> + surf->sx_blend_opt_epsilon = V_028758_6BIT_FORMAT;
> + } else if (desc->nr_channels == 4) {
> + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_1_5_5_5;
> + surf->sx_blend_opt_epsilon = V_028758_5BIT_FORMAT;
> + }
> + break;
> + case 4:
> + /* For 1 nad 2-channel formats, use the superset thereof. */
> + surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_4_4_4_4;
> + surf->sx_blend_opt_epsilon = V_028758_4BIT_FORMAT;
> + break;
> + }
> + }
> +
> surf->color_initialized = true;
> }
>
> @@ -2260,6 +2402,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
> unsigned i, nr_cbufs = state->nr_cbufs;
> struct r600_texture *tex = NULL;
> struct r600_surface *cb = NULL;
> + uint32_t sx_ps_downconvert = 0;
> + uint32_t sx_blend_opt_epsilon = 0;
>
> /* Colorbuffers. */
> for (i = 0; i < nr_cbufs; i++) {
> @@ -2310,18 +2454,29 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
>
> if (sctx->b.chip_class >= VI)
> radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */
> +
> + sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
> + sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
> }
> /* set CB_COLOR1_INFO for possible dual-src blending */
> if (i == 1 && state->cbufs[0] &&
> sctx->framebuffer.dirty_cbufs & (1 << 0)) {
> radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
> cb->cb_color_info | tex->cb_color_info);
> + sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
> + sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
> i++;
> }
> for (; i < 8 ; i++)
> if (sctx->framebuffer.dirty_cbufs & (1 << i))
> radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
>
> + if (sctx->b.family == CHIP_STONEY) {
> + radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 2);
> + radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */
> + radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */
> + }
> +
> /* ZS buffer. */
> if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
> struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
> @@ -3486,7 +3641,7 @@ static void si_init_config(struct si_context *sctx)
> }
>
> if (sctx->b.family == CHIP_STONEY)
> - si_pm4_set_reg(pm4, R_028754_SX_PS_DOWNCONVERT, 0);
> + si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
>
> si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
> if (sctx->b.chip_class >= CIK)
> diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
> index 7866d58..9e1e158 100644
> --- a/src/gallium/drivers/radeonsi/sid.h
> +++ b/src/gallium/drivers/radeonsi/sid.h
> @@ -6771,6 +6771,9 @@
> #define G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) >> 27) & 0x1)
> #define C_028804_ENABLE_POSTZ_OVERRASTERIZATION 0xF7FFFFFF
> #define R_028808_CB_COLOR_CONTROL 0x028808
> +#define S_028808_DISABLE_DUAL_QUAD(x) (((x) & 0x1) << 0)
> +#define G_028808_DISABLE_DUAL_QUAD(x) (((x) >> 0) & 0x1)
> +#define C_028808_DISABLE_DUAL_QUAD 0xFFFFFFFE
> #define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3)
> #define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1)
> #define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7
> --
> 2.1.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list