[Mesa-dev] [PATCH 3/3] radeonsi: add a workaround for the Witcher 2 black transitions

Marek Olšák maraeo at gmail.com
Thu Jan 5 12:56:54 UTC 2017


I'm gonna send a v2 of this patch with a different commit message.

Marek

On Wed, Jan 4, 2017 at 1:45 PM, Marek Olšák <maraeo at gmail.com> wrote:
> Here's my theory about the Witcher 2 issue:
>
> The problematic shader contains KILL. Reloading inputs after KILL is
> unsafe, because KILL breaks the WQM mode, thus the inputs are not
> loaded for the whole quad. Control flow statements have a similar
> issue.
>
> These are the cases when inputs can be reloaded safely at each use:
>
> the shader doesn't use WQM ||
> (the reload doesn't take place inside a conditional block &&
>  the reload is before KILL)
>
> Marek
>
> On Tue, Jan 3, 2017 at 8:17 PM, Marek Olšák <maraeo at gmail.com> wrote:
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> ---
>>  src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 15 +++++++++++++--
>>  1 file changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
>> index 996a458..efe28d1 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
>> @@ -629,20 +629,29 @@ store_value_to_array(struct lp_build_tgsi_context *bld_base,
>>                         default:
>>                                 continue;
>>                         }
>>                         value = LLVMBuildExtractElement(builder, array,
>>                                 lp_build_const_int32(gallivm, i), "");
>>                         LLVMBuildStore(builder, value, temp_ptr);
>>                 }
>>         }
>>  }
>>
>> +/* If this is 1, preload FS inputs at the beginning of shaders. Otherwise,
>> + * reload them at each use.
>> + *
>> + * This must be 1 for Witcher 2 to render correctly. The cause of the Witcher 2
>> + * issue is still unknown. I only know that M0 is correct throughout the whole
>> + * shader.
>> + */
>> +#define SI_PRELOAD_FS_INPUTS 1
>> +
>>  LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
>>                                 const struct tgsi_full_src_register *reg,
>>                                 enum tgsi_opcode_type type,
>>                                 unsigned swizzle)
>>  {
>>         struct si_shader_context *ctx = si_shader_context(bld_base);
>>         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
>>         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
>>         LLVMValueRef result = NULL, ptr, ptr2;
>>
>> @@ -681,21 +690,22 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
>>
>>         case TGSI_FILE_INPUT: {
>>                 unsigned index = reg->Register.Index;
>>                 LLVMValueRef input[4];
>>
>>                 /* I don't think doing this for vertex shaders is beneficial.
>>                  * For those, we want to make sure the VMEM loads are executed
>>                  * only once. Fragment shaders don't care much, because
>>                  * v_interp instructions are much cheaper than VMEM loads.
>>                  */
>> -               if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
>> +               if (!SI_PRELOAD_FS_INPUTS &&
>> +                   ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
>>                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
>>                 else
>>                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
>>
>>                 result = input[swizzle];
>>
>>                 if (tgsi_type_is_64bit(type)) {
>>                         ptr = result;
>>                         ptr2 = input[swizzle + 1];
>>                         return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
>> @@ -874,21 +884,22 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
>>         {
>>                 unsigned idx;
>>                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
>>                         if (ctx->load_input &&
>>                             ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
>>                                 ctx->input_decls[idx] = *decl;
>>                                 ctx->input_decls[idx].Range.First = idx;
>>                                 ctx->input_decls[idx].Range.Last = idx;
>>                                 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
>>
>> -                               if (bld_base->info->processor != PIPE_SHADER_FRAGMENT)
>> +                               if (SI_PRELOAD_FS_INPUTS ||
>> +                                   bld_base->info->processor != PIPE_SHADER_FRAGMENT)
>>                                         ctx->load_input(ctx, idx, &ctx->input_decls[idx],
>>                                                         &ctx->inputs[idx * 4]);
>>                         }
>>                 }
>>         }
>>         break;
>>
>>         case TGSI_FILE_SYSTEM_VALUE:
>>         {
>>                 unsigned idx;
>> --
>> 2.7.4
>>


More information about the mesa-dev mailing list