[Mesa-dev] [PATCH 3/3] radeonsi: add a workaround for the Witcher 2 black transitions

Wed Jan 4 12:45:20 UTC 2017

Here's my theory about the Witcher 2 issue:

The problematic shader contains KILL. Reloading inputs after KILL is
unsafe, because KILL breaks the WQM mode, thus the inputs are not
loaded for the whole quad. Control flow statements have a similar
issue.

These are the cases when inputs can be reloaded safely at each use:

the shader doesn't use WQM ||
(the reload doesn't take place inside a conditional block &&
 the reload is before KILL)

Marek

On Tue, Jan 3, 2017 at 8:17 PM, Marek Olšák <maraeo at gmail.com> wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
>  src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> index 996a458..efe28d1 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> @@ -629,20 +629,29 @@ store_value_to_array(struct lp_build_tgsi_context *bld_base,
>                         default:
>                                 continue;
>                         }
>                         value = LLVMBuildExtractElement(builder, array,
>                                 lp_build_const_int32(gallivm, i), "");
>                         LLVMBuildStore(builder, value, temp_ptr);
>                 }
>         }
>  }
>
> +/* If this is 1, preload FS inputs at the beginning of shaders. Otherwise,
> + * reload them at each use.
> + *
> + * This must be 1 for Witcher 2 to render correctly. The cause of the Witcher 2
> + * issue is still unknown. I only know that M0 is correct throughout the whole
> + * shader.
> + */
> +#define SI_PRELOAD_FS_INPUTS 1
> +
>  LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
>                                 const struct tgsi_full_src_register *reg,
>                                 enum tgsi_opcode_type type,
>                                 unsigned swizzle)
>  {
>         struct si_shader_context *ctx = si_shader_context(bld_base);
>         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
>         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
>         LLVMValueRef result = NULL, ptr, ptr2;
>
> @@ -681,21 +690,22 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
>
>         case TGSI_FILE_INPUT: {
>                 unsigned index = reg->Register.Index;
>                 LLVMValueRef input[4];
>
>                 /* I don't think doing this for vertex shaders is beneficial.
>                  * For those, we want to make sure the VMEM loads are executed
>                  * only once. Fragment shaders don't care much, because
>                  * v_interp instructions are much cheaper than VMEM loads.
>                  */
> -               if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
> +               if (!SI_PRELOAD_FS_INPUTS &&
> +                   ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
>                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
>                 else
>                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
>
>                 result = input[swizzle];
>
>                 if (tgsi_type_is_64bit(type)) {
>                         ptr = result;
>                         ptr2 = input[swizzle + 1];
>                         return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
> @@ -874,21 +884,22 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
>         {
>                 unsigned idx;
>                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
>                         if (ctx->load_input &&
>                             ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
>                                 ctx->input_decls[idx] = *decl;
>                                 ctx->input_decls[idx].Range.First = idx;
>                                 ctx->input_decls[idx].Range.Last = idx;
>                                 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
>
> -                               if (bld_base->info->processor != PIPE_SHADER_FRAGMENT)
> +                               if (SI_PRELOAD_FS_INPUTS ||
> +                                   bld_base->info->processor != PIPE_SHADER_FRAGMENT)
>                                         ctx->load_input(ctx, idx, &ctx->input_decls[idx],
>                                                         &ctx->inputs[idx * 4]);
>                         }
>                 }
>         }
>         break;
>
>         case TGSI_FILE_SYSTEM_VALUE:
>         {
>                 unsigned idx;
> --
> 2.7.4
>