[Mesa-dev] [PATCH 9/9] radeonsi: add an environment variable that forces EQAA for MSAA allocations
Nicolai Hähnle
nhaehnle at gmail.com
Mon May 7 12:16:21 UTC 2018
Very nice. We should probably think about exposing this as a more
generally available performance knob if you're confident enough that it
works.
On 03.05.2018 00:42, Marek Olšák wrote:
> FYI, the environment variable will only have effect on amdgpu.
If I understand the code correctly, the environment variable *will* have
an effect on radeon, but it'll likely just not work correctly because
the surfaces aren't computed correctly.
So it seems a good idea to have an explicit check for that. With that,
patches 5-9:
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> Marek
>
> On Wed, May 2, 2018 at 12:13 AM, Marek Olšák <maraeo at gmail.com
> <mailto:maraeo at gmail.com>> wrote:
>
> From: Marek Olšák <marek.olsak at amd.com <mailto:marek.olsak at amd.com>>
>
> This is for testing and experiments.
> ---
> src/gallium/drivers/radeonsi/si_pipe.c | 22 ++++++++++++++++
> src/gallium/drivers/radeonsi/si_pipe.h | 3 +++
> src/gallium/drivers/radeonsi/si_state.c | 5 ++++
> src/gallium/drivers/radeonsi/si_texture.c | 31 +++++++++++++++++++----
> 4 files changed, 56 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index 1ca38ed55cb..35c2c200e57 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -1065,20 +1065,42 @@ struct pipe_screen
> *radeonsi_screen_create(struct radeon_winsys *ws,
> sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
> SI_CONTEXT_INV_VMEM_L1;
> if (sscreen->info.chip_class <= VI) {
> sscreen->barrier_flags.cp_to_L2 |=
> SI_CONTEXT_INV_GLOBAL_L2;
> sscreen->barrier_flags.L2_to_cp |=
> SI_CONTEXT_WRITEBACK_GLOBAL_L2;
> }
>
> if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
> sscreen->debug_flags |= DBG_ALL_SHADERS;
>
> + /* Syntax:
> + * EQAA=s,z,c
> + * Example:
> + * EQAA=8,4,2
> +
> + * That means 8 coverage samples, 4 Z/S samples, and 2 color
> samples.
> + * Constraints:
> + * s >= z >= c (ignoring this only wastes memory)
> + * s = [2..16]
> + * z = [2..8]
> + * c = [2..8]
> + *
> + * Only MSAA color and depth buffers are overriden.
> + */
> + const char *eqaa = debug_get_option("EQAA", NULL);
> + unsigned s,z,f;
> + if (eqaa && sscanf(eqaa, "%u,%u,%u", &s, &z, &f) == 3 && s
> && z && f) {
> + sscreen->eqaa_force_coverage_samples = s;
> + sscreen->eqaa_force_z_samples = z;
> + sscreen->eqaa_force_color_samples = f;
> + }
> +
> for (i = 0; i < num_comp_hi_threads; i++)
> si_init_compiler(sscreen, &sscreen->compiler[i]);
> for (i = 0; i < num_comp_lo_threads; i++)
> si_init_compiler(sscreen, &sscreen->compiler_lowp[i]);
>
> /* Create the auxiliary context. This must be done last. */
> sscreen->aux_context = si_create_context(&sscreen->b, 0);
>
> if (sscreen->debug_flags & DBG(TEST_DMA))
> si_test_dma(sscreen);
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> b/src/gallium/drivers/radeonsi/si_pipe.h
> index 55a135f3870..6917d5e6068 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -409,20 +409,23 @@ struct si_screen {
>
> struct radeon_info info;
> uint64_t debug_flags;
> char renderer_string[100];
>
> unsigned gs_table_depth;
> unsigned tess_offchip_block_dw_size;
> unsigned tess_offchip_ring_size;
> unsigned tess_factor_ring_size;
> unsigned vgt_hs_offchip_param;
> + unsigned eqaa_force_coverage_samples;
> + unsigned eqaa_force_z_samples;
> + unsigned eqaa_force_color_samples;
> bool has_clear_state;
> bool has_distributed_tess;
> bool has_draw_indirect_multi;
> bool has_out_of_order_rast;
> bool assume_no_z_fights;
> bool commutative_blend_add;
> bool clear_db_cache_before_clear;
> bool has_msaa_sample_loc_bug;
> bool has_ls_vgpr_init_bug;
> bool dpbb_allowed;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c
> b/src/gallium/drivers/radeonsi/si_state.c
> index e133bf28589..c7585b285e9 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -2112,20 +2112,21 @@ static bool si_is_zs_format_supported(enum
> pipe_format format)
> {
> return si_translate_dbformat(format) != V_028040_Z_INVALID;
> }
>
> static boolean si_is_format_supported(struct pipe_screen *screen,
> enum pipe_format format,
> enum pipe_texture_target target,
> unsigned sample_count,
> unsigned usage)
> {
> + struct si_screen *sscreen = (struct si_screen *)screen;
> unsigned retval = 0;
>
> if (target >= PIPE_MAX_TEXTURE_TYPES) {
> PRINT_ERR("r600: unsupported texture type %d\n",
> target);
> return false;
> }
>
> if (!util_format_is_supported(format, usage))
> return false;
>
> @@ -2135,20 +2136,24 @@ static boolean si_is_format_supported(struct
> pipe_screen *screen,
>
> if (usage & PIPE_BIND_SHADER_IMAGE)
> return false;
>
> switch (sample_count) {
> case 2:
> case 4:
> case 8:
> break;
> case 16:
> + /* Allow resource_copy_region with
> nr_samples == 16. */
> + if (sscreen->eqaa_force_coverage_samples ==
> 16 &&
> + !util_format_is_depth_or_stencil(format))
> + return true;
> if (format == PIPE_FORMAT_NONE)
> return true;
> else
> return false;
> default:
> return false;
> }
> }
>
> if (usage & (PIPE_BIND_SAMPLER_VIEW |
> diff --git a/src/gallium/drivers/radeonsi/si_texture.c
> b/src/gallium/drivers/radeonsi/si_texture.c
> index 52b8b87732f..804708e0516 100644
> --- a/src/gallium/drivers/radeonsi/si_texture.c
> +++ b/src/gallium/drivers/radeonsi/si_texture.c
> @@ -1380,47 +1380,68 @@ si_choose_tiling(struct si_screen *sscreen,
>
> /* Make small textures 1D tiled. */
> if (templ->width0 <= 16 || templ->height0 <= 16 ||
> (sscreen->debug_flags & DBG(NO_2D_TILING)))
> return RADEON_SURF_MODE_1D;
>
> /* The allocator will switch to 1D if needed. */
> return RADEON_SURF_MODE_2D;
> }
>
> -static unsigned si_get_num_color_samples(const struct pipe_resource
> *templ,
> +static unsigned si_get_num_color_samples(struct si_screen *sscreen,
> + const struct pipe_resource
> *templ,
> bool imported)
> {
> + if (!imported && templ->nr_samples >= 2 &&
> + sscreen->eqaa_force_color_samples)
> + return sscreen->eqaa_force_color_samples;
> +
> return CLAMP(templ->nr_samples, 1, 8);
> }
>
> struct pipe_resource *si_texture_create(struct pipe_screen *screen,
> const struct pipe_resource
> *templ)
> {
> struct si_screen *sscreen = (struct si_screen*)screen;
> + bool is_zs = util_format_is_depth_or_stencil(templ->format);
> +
> + if (templ->nr_samples >= 2) {
> + /* This is hackish (overwriting the const
> pipe_resource template),
> + * but should be harmless and state trackers can
> also see
> + * the overriden number of samples in the created
> pipe_resource.
> + */
> + if (is_zs && sscreen->eqaa_force_z_samples) {
> + ((struct pipe_resource*)templ)->nr_samples =
> + sscreen->eqaa_force_z_samples;
> + } else if (!is_zs &&
> sscreen->eqaa_force_color_samples) {
> + ((struct pipe_resource*)templ)->nr_samples =
> + sscreen->eqaa_force_coverage_samples;
> + }
> + }
> +
> struct radeon_surf surface = {0};
> bool is_flushed_depth = templ->flags &
> SI_RESOURCE_FLAG_FLUSHED_DEPTH;
> bool tc_compatible_htile =
> sscreen->info.chip_class >= VI &&
> /* There are issues with TC-compatible HTILE on
> Tonga (and
> * Iceland is the same design), and documented bug
> workarounds
> * don't help. For example, this fails:
> * piglit/bin/tex-miplevel-selection 'texture()'
> 2DShadow -auto
> */
> sscreen->info.family != CHIP_TONGA &&
> sscreen->info.family != CHIP_ICELAND &&
> (templ->flags &
> PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
> !(sscreen->debug_flags & DBG(NO_HYPERZ)) &&
> !is_flushed_depth &&
> templ->nr_samples <= 1 && /* TC-compat HTILE is
> less efficient with MSAA */
> - util_format_is_depth_or_stencil(templ->format);
> - unsigned num_color_samples = si_get_num_color_samples(templ,
> false);
> + is_zs;
> + unsigned num_color_samples =
> si_get_num_color_samples(sscreen, templ, false);
> int r;
>
> r = si_init_surface(sscreen, &surface, templ,
> num_color_samples,
> si_choose_tiling(sscreen, templ,
> tc_compatible_htile),
> 0, 0, false, false, is_flushed_depth,
> tc_compatible_htile);
> if (r) {
> return NULL;
> }
>
> @@ -1450,21 +1471,21 @@ static struct pipe_resource
> *si_texture_from_handle(struct pipe_screen *screen,
> return NULL;
>
> buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle,
> &stride, &offset);
> if (!buf)
> return NULL;
>
> sscreen->ws->buffer_get_metadata(buf, &metadata);
> si_surface_import_metadata(sscreen, &surface, &metadata,
> &array_mode, &is_scanout);
>
> - unsigned num_color_samples = si_get_num_color_samples(templ,
> true);
> + unsigned num_color_samples =
> si_get_num_color_samples(sscreen, templ, true);
>
> r = si_init_surface(sscreen, &surface, templ,
> num_color_samples,
> array_mode, stride, offset, true,
> is_scanout,
> false, false);
> if (r) {
> return NULL;
> }
>
> rtex = si_texture_create_object(screen, templ,
> num_color_samples,
> buf, &surface);
> @@ -2384,21 +2405,21 @@ si_texture_from_memobj(struct pipe_screen
> *screen,
> * implementation simple.
> *
> * A possible alternative is to attempt to
> reconstruct the
> * tiling information when the TexParameter
> TEXTURE_TILING_EXT
> * is set.
> */
> array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
> is_scanout = false;
> }
>
> - unsigned num_color_samples = si_get_num_color_samples(templ,
> true);
> + unsigned num_color_samples =
> si_get_num_color_samples(sscreen, templ, true);
>
> r = si_init_surface(sscreen, &surface, templ,
> num_color_samples,
> array_mode, memobj->stride, offset, true,
> is_scanout, false, false);
> if (r)
> return NULL;
>
> rtex = si_texture_create_object(screen, templ,
> num_color_samples,
> memobj->buf, &surface);
> if (!rtex)
> --
> 2.17.0
>
>
>
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list