[Mesa-dev] [PATCH] radeonsi: disable CE by default
Dieter Nützel
Dieter at nuetzel-hh.de
Tue Aug 15 14:48:55 UTC 2017
Hello Marek,
FWIW: Tested-by: Dieter... ;-)
I saw, you've committed it all the way.
But some numbers for RX580 for you.
Not only 'very small' amount, here.
Xeon X3470, ~3 GHz, 4/8 Cores, 24 GB
with R600_DEBUG=ce
glmark2
(final result) goes _down_ by ~5,6%
single tests in the range of 2 - 8%
(worse is [texture] texture-filter=nearest) - or _better_ ;-)
Best results _ever_.
Kernel
self compiled
4.13.0-rc2-1.g7262353-default+ (amd-staging-drm-next)
and
openSUSE Kernel:stable
4.12.7-1.g7dae241-default (which show the GREATEST numbers)
So I have to figure out which optimizations the openSUSE kernel team
use... --- Hello Jiri, Stefan?!
Congrats to the whole 'AMD (Linux) Vega Team',
very impressive numbers on day-0.
I'll hope RX5xx see some further improvements, too.
Cheers,
Dieter
Am 13.08.2017 19:27, schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak at amd.com>
>
> It makes performance worse by a very small (hard to measure) amount.
> We've done extensive profiling of this feature internally.
>
> Cc: 17.1 17.2 <mesa-stable at lists.freedesktop.org>
> ---
> src/gallium/drivers/radeon/r600_pipe_common.c | 1 +
> src/gallium/drivers/radeon/r600_pipe_common.h | 4 ++--
> src/gallium/drivers/radeonsi/si_pipe.c | 24
> ++++++++++++++++++------
> 3 files changed, 21 insertions(+), 8 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c
> b/src/gallium/drivers/radeon/r600_pipe_common.c
> index 0038c9a..cb4b7a4 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -768,20 +768,21 @@ static const struct debug_named_value
> common_debug_options[] = {
> { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on
> end-of-packet." },
> { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all
> operations when possible." },
> { "precompile", DBG_PRECOMPILE, "Compile one shader variant at
> shader creation." },
> { "nowc", DBG_NO_WC, "Disable GTT write combining" },
> { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
> { "nodcc", DBG_NO_DCC, "Disable DCC." },
> { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
> { "norbplus", DBG_NO_RB_PLUS, "Disable RB+." },
> { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction
> Scheduler." },
> { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders
> compiled on demand" },
> + { "ce", DBG_CE, "Force enable the constant engine" },
> { "noce", DBG_NO_CE, "Disable the constant engine"},
> { "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader
> optimizations" },
> { "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main
> framebuffer" },
>
> DEBUG_NAMED_VALUE_END /* must be last */
> };
>
> static const char* r600_get_vendor(struct pipe_screen* pscreen)
> {
> return "X.Org";
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
> b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 67b3c87..14bc63e 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -58,26 +58,26 @@
> #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
> /* Pipeline & streamout query controls. */
> #define R600_CONTEXT_START_PIPELINE_STATS (1u << 1)
> #define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2)
> #define R600_CONTEXT_PRIVATE_FLAG (1u << 3)
>
> /* special primitive types */
> #define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
>
> /* Debug flags. */
> -/* logging */
> +/* logging and features */
> #define DBG_TEX (1 << 0)
> #define DBG_NIR (1 << 1)
> #define DBG_COMPUTE (1 << 2)
> #define DBG_VM (1 << 3)
> -/* gap - reuse */
> +#define DBG_CE (1 << 4)
> /* shader logging */
> #define DBG_FS (1 << 5)
> #define DBG_VS (1 << 6)
> #define DBG_GS (1 << 7)
> #define DBG_PS (1 << 8)
> #define DBG_CS (1 << 9)
> #define DBG_TCS (1 << 10)
> #define DBG_TES (1 << 11)
> #define DBG_NO_IR (1 << 12)
> #define DBG_NO_TGSI (1 << 13)
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index 2c65cc8..cac1d01 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -194,26 +194,38 @@ static struct pipe_context
> *si_create_context(struct pipe_screen *screen,
> sctx->b.b.create_video_codec = si_uvd_create_decoder;
> sctx->b.b.create_video_buffer = si_video_buffer_create;
> } else {
> sctx->b.b.create_video_codec = vl_create_decoder;
> sctx->b.b.create_video_buffer = vl_video_buffer_create;
> }
>
> sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX,
> si_context_gfx_flush, sctx);
>
> - /* SI + AMDGPU + CE = GPU hang */
> - if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib &&
> - sscreen->b.chip_class != SI &&
> - /* These can't use CE due to a power gating bug in the kernel. */
> - sscreen->b.family != CHIP_CARRIZO &&
> - sscreen->b.family != CHIP_STONEY) {
> + bool enable_ce = sscreen->b.chip_class != SI && /* SI hangs */
> + /* These can't use CE due to a power gating bug in the kernel. */
> + sscreen->b.family != CHIP_CARRIZO &&
> + sscreen->b.family != CHIP_STONEY;
> +
> + /* CE is currently disabled by default, because it makes s_load
> latency
> + * worse, because CE IB doesn't run in lockstep with DE.
> + * Remove this line after that performance issue has been resolved.
> + */
> + enable_ce = false;
> +
> + /* Apply CE overrides. */
> + if (sscreen->b.debug_flags & DBG_NO_CE)
> + enable_ce = false;
> + else if (sscreen->b.debug_flags & DBG_CE)
> + enable_ce = true;
> +
> + if (ws->cs_add_const_ib && enable_ce) {
> sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs);
> if (!sctx->ce_ib)
> goto fail;
>
> if (ws->cs_add_const_preamble_ib) {
> sctx->ce_preamble_ib =
> ws->cs_add_const_preamble_ib(sctx->b.gfx.cs);
>
> if (!sctx->ce_preamble_ib)
> goto fail;
More information about the mesa-dev
mailing list