[Mesa-dev] [PATCH] radeonsi: implement RB+ for Stoney (v2)

Alex Deucher alexdeucher at gmail.com
Thu Dec 10 09:19:12 PST 2015


On Wed, Dec 9, 2015 at 5:35 PM, Marek Olšák <maraeo at gmail.com> wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> v2: fix dual source blending

Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  src/gallium/drivers/radeon/r600_pipe_common.c |   1 +
>  src/gallium/drivers/radeon/r600_pipe_common.h |   3 +
>  src/gallium/drivers/radeon/r600_texture.c     |   6 +
>  src/gallium/drivers/radeonsi/si_state.c       | 159 +++++++++++++++++++++++++-
>  src/gallium/drivers/radeonsi/sid.h            |   3 +
>  5 files changed, 170 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
> index 8899ba4..ba541ac 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -375,6 +375,7 @@ static const struct debug_named_value common_debug_options[] = {
>         { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
>         { "nodcc", DBG_NO_DCC, "Disable DCC." },
>         { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
> +       { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
>
>         DEBUG_NAMED_VALUE_END /* must be last */
>  };
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 8c6c0c3..dd23ed5 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -86,6 +86,7 @@
>  #define DBG_CHECK_VM           (1llu << 42)
>  #define DBG_NO_DCC             (1llu << 43)
>  #define DBG_NO_DCC_CLEAR       (1llu << 44)
> +#define DBG_NO_RB_PLUS         (1llu << 45)
>
>  #define R600_MAP_BUFFER_ALIGNMENT 64
>
> @@ -250,6 +251,8 @@ struct r600_surface {
>         unsigned cb_color_fmask_slice;  /* EG and later */
>         unsigned cb_color_cmask;        /* CB_COLORn_TILE (r600 only) */
>         unsigned cb_color_mask;         /* R600 only */
> +       unsigned sx_ps_downconvert;     /* Stoney only */
> +       unsigned sx_blend_opt_epsilon;  /* Stoney only */
>         struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
>         struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
>
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 774722f..8c145e5 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -1393,6 +1393,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>                 return;
>
>         for (i = 0; i < fb->nr_cbufs; i++) {
> +               struct r600_surface *surf;
>                 struct r600_texture *tex;
>                 unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
>
> @@ -1403,6 +1404,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>                 if (!(*buffers & clear_bit))
>                         continue;
>
> +               surf = (struct r600_surface *)fb->cbufs[i];
>                 tex = (struct r600_texture *)fb->cbufs[i]->texture;
>
>                 /* 128-bit formats are unusupported */
> @@ -1449,6 +1451,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>                         if (clear_words_needed)
>                                 tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
>                 } else {
> +                       /* RB+ doesn't work with CMASK fast clear. */
> +                       if (surf->sx_ps_downconvert)
> +                               continue;
> +
>                         /* ensure CMASK is enabled */
>                         r600_texture_alloc_cmask_separate(rctx->screen, tex);
>                         if (tex->cmask.size == 0) {
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 2ebfa1c..dcf4a7b 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -347,10 +347,54 @@ static uint32_t si_translate_blend_factor(int blend_fact)
>         return 0;
>  }
>
> +static uint32_t si_translate_blend_opt_function(int blend_func)
> +{
> +       switch (blend_func) {
> +       case PIPE_BLEND_ADD:
> +               return V_028760_OPT_COMB_ADD;
> +       case PIPE_BLEND_SUBTRACT:
> +               return V_028760_OPT_COMB_SUBTRACT;
> +       case PIPE_BLEND_REVERSE_SUBTRACT:
> +               return V_028760_OPT_COMB_REVSUBTRACT;
> +       case PIPE_BLEND_MIN:
> +               return V_028760_OPT_COMB_MIN;
> +       case PIPE_BLEND_MAX:
> +               return V_028760_OPT_COMB_MAX;
> +       default:
> +               return V_028760_OPT_COMB_BLEND_DISABLED;
> +       }
> +}
> +
> +static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
> +{
> +       switch (blend_fact) {
> +       case PIPE_BLENDFACTOR_ZERO:
> +               return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
> +       case PIPE_BLENDFACTOR_ONE:
> +               return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
> +       case PIPE_BLENDFACTOR_SRC_COLOR:
> +               return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
> +                               : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
> +       case PIPE_BLENDFACTOR_INV_SRC_COLOR:
> +               return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
> +                               : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
> +       case PIPE_BLENDFACTOR_SRC_ALPHA:
> +               return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
> +       case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
> +               return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
> +       case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
> +               return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
> +                               : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
> +       default:
> +               return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
> +       }
> +}
> +
>  static void *si_create_blend_state_mode(struct pipe_context *ctx,
>                                         const struct pipe_blend_state *state,
>                                         unsigned mode)
>  {
> +       struct si_context *sctx = (struct si_context*)ctx;
>         struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
>         struct si_pm4_state *pm4 = &blend->pm4;
>
> @@ -416,8 +460,47 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
>         } else {
>                 color_control |= S_028808_MODE(V_028808_CB_DISABLE);
>         }
> -       si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
>
> +       if (sctx->b.family == CHIP_STONEY) {
> +               uint32_t sx_blend_opt_control = 0;
> +
> +               for (int i = 0; i < 8; i++) {
> +                       const int j = state->independent_blend_enable ? i : 0;
> +
> +                       /* TODO: We can also set this if the surface doesn't contain RGB. */
> +                       if (!state->rt[j].blend_enable ||
> +                           !(state->rt[j].colormask & (PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B)))
> +                               sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (4 * i);
> +
> +                       /* TODO: We can also set this if the surface doesn't contain alpha. */
> +                       if (!state->rt[j].blend_enable ||
> +                           !(state->rt[j].colormask & PIPE_MASK_A))
> +                               sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (4 * i);
> +
> +                       if (!state->rt[j].blend_enable) {
> +                               si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
> +                                              S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
> +                                              S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED));
> +                               continue;
> +                       }
> +
> +                       si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
> +                               S_028760_COLOR_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_src_factor, false)) |
> +                               S_028760_COLOR_DST_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_dst_factor, false)) |
> +                               S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(state->rt[j].rgb_func)) |
> +                               S_028760_ALPHA_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_src_factor, true)) |
> +                               S_028760_ALPHA_DST_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_dst_factor, true)) |
> +                               S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(state->rt[j].alpha_func)));
> +               }
> +
> +               si_pm4_set_reg(pm4, R_02875C_SX_BLEND_OPT_CONTROL, sx_blend_opt_control);
> +
> +               /* RB+ doesn't work with dual source blending */
> +               if (blend->dual_src_blend)
> +                       color_control |= S_028808_DISABLE_DUAL_QUAD(1);
> +       }
> +
> +       si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
>         return blend;
>  }
>
> @@ -1057,6 +1140,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
>         if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
>                 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
>
> +       if (sctx->b.family == CHIP_STONEY &&
> +           sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
> +               db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
> +
>         radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
>                                db_shader_control);
>  }
> @@ -1993,6 +2080,61 @@ static void si_initialize_color_surface(struct si_context *sctx,
>                 surf->export_16bpc = true;
>         }
>
> +       if (sctx->b.family == CHIP_STONEY &&
> +           !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
> +               switch (desc->channel[0].size) {
> +               case 32:
> +                       if (desc->nr_channels == 1) {
> +                               if (swap == V_0280A0_SWAP_STD)
> +                                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
> +                               else if (swap == V_0280A0_SWAP_ALT_REV)
> +                                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_A;
> +                       }
> +                       break;
> +               case 16:
> +                       /* For 1-channel formats, use the superset thereof. */
> +                       if (desc->nr_channels <= 2) {
> +                               if (swap == V_0280A0_SWAP_STD ||
> +                                   swap == V_0280A0_SWAP_STD_REV)
> +                                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_GR;
> +                               else
> +                                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_AR;
> +                       }
> +                       break;
> +               case 11:
> +                       if (desc->nr_channels == 3) {
> +                               surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_10_11_11;
> +                               surf->sx_blend_opt_epsilon = V_028758_11BIT_FORMAT;
> +                       }
> +                       break;
> +               case 10:
> +                       if (desc->nr_channels == 4) {
> +                               surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_2_10_10_10;
> +                               surf->sx_blend_opt_epsilon = V_028758_10BIT_FORMAT;
> +                       }
> +                       break;
> +               case 8:
> +                       /* For 1 and 2-channel formats, use the superset thereof. */
> +                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_8_8_8_8;
> +                       surf->sx_blend_opt_epsilon = V_028758_8BIT_FORMAT;
> +                       break;
> +               case 5:
> +                       if (desc->nr_channels == 3) {
> +                               surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_5_6_5;
> +                               surf->sx_blend_opt_epsilon = V_028758_6BIT_FORMAT;
> +                       } else if (desc->nr_channels == 4) {
> +                               surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_1_5_5_5;
> +                               surf->sx_blend_opt_epsilon = V_028758_5BIT_FORMAT;
> +                       }
> +                       break;
> +               case 4:
> +                       /* For 1 nad 2-channel formats, use the superset thereof. */
> +                       surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_4_4_4_4;
> +                       surf->sx_blend_opt_epsilon = V_028758_4BIT_FORMAT;
> +                       break;
> +               }
> +       }
> +
>         surf->color_initialized = true;
>  }
>
> @@ -2260,6 +2402,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
>         unsigned i, nr_cbufs = state->nr_cbufs;
>         struct r600_texture *tex = NULL;
>         struct r600_surface *cb = NULL;
> +       uint32_t sx_ps_downconvert = 0;
> +       uint32_t sx_blend_opt_epsilon = 0;
>
>         /* Colorbuffers. */
>         for (i = 0; i < nr_cbufs; i++) {
> @@ -2310,18 +2454,29 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
>
>                 if (sctx->b.chip_class >= VI)
>                         radeon_emit(cs, cb->cb_dcc_base);       /* R_028C94_CB_COLOR0_DCC_BASE */
> +
> +               sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
> +               sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
>         }
>         /* set CB_COLOR1_INFO for possible dual-src blending */
>         if (i == 1 && state->cbufs[0] &&
>             sctx->framebuffer.dirty_cbufs & (1 << 0)) {
>                 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
>                                        cb->cb_color_info | tex->cb_color_info);
> +               sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
> +               sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
>                 i++;
>         }
>         for (; i < 8 ; i++)
>                 if (sctx->framebuffer.dirty_cbufs & (1 << i))
>                         radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
>
> +       if (sctx->b.family == CHIP_STONEY) {
> +               radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 2);
> +               radeon_emit(cs, sx_ps_downconvert);     /* R_028754_SX_PS_DOWNCONVERT */
> +               radeon_emit(cs, sx_blend_opt_epsilon);  /* R_028758_SX_BLEND_OPT_EPSILON */
> +       }
> +
>         /* ZS buffer. */
>         if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
>                 struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
> @@ -3486,7 +3641,7 @@ static void si_init_config(struct si_context *sctx)
>         }
>
>         if (sctx->b.family == CHIP_STONEY)
> -               si_pm4_set_reg(pm4, R_028754_SX_PS_DOWNCONVERT, 0);
> +               si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
>
>         si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
>         if (sctx->b.chip_class >= CIK)
> diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
> index 7866d58..9e1e158 100644
> --- a/src/gallium/drivers/radeonsi/sid.h
> +++ b/src/gallium/drivers/radeonsi/sid.h
> @@ -6771,6 +6771,9 @@
>  #define   G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x)                  (((x) >> 27) & 0x1)
>  #define   C_028804_ENABLE_POSTZ_OVERRASTERIZATION                     0xF7FFFFFF
>  #define R_028808_CB_COLOR_CONTROL                                       0x028808
> +#define   S_028808_DISABLE_DUAL_QUAD(x)                               (((x) & 0x1) << 0)
> +#define   G_028808_DISABLE_DUAL_QUAD(x)                               (((x) >> 0) & 0x1)
> +#define   C_028808_DISABLE_DUAL_QUAD                                  0xFFFFFFFE
>  #define   S_028808_DEGAMMA_ENABLE(x)                                  (((x) & 0x1) << 3)
>  #define   G_028808_DEGAMMA_ENABLE(x)                                  (((x) >> 3) & 0x1)
>  #define   C_028808_DEGAMMA_ENABLE                                     0xFFFFFFF7
> --
> 2.1.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list