[Mesa-dev] [PATCH 4/5] amd/common: remove has_ds_bpermute argument from ac_build_ddxy

Connor Abbott cwabbott0 at gmail.com
Wed Sep 13 19:48:43 UTC 2017


On Wed, Sep 13, 2017 at 3:46 PM, Nicolai Hähnle <nicolai.haehnle at amd.com> wrote:
> On 13.09.2017 20:45, Connor Abbott wrote:
>>
>> Not sure if we'll want to do this, since we'll need to need to
>> effectively revert it anyways when we implement derivatives with DPP
>> (although we'll have to rename has_ds_bpermute to has_dpp...).
>
>
> Is there a reason for not deriving has_dpp from chip_class?

Yeah, good point. Maybe chip_class wasn't available when the field was added?

>
> Cheers,
> Nicolai
>
>
>>
>> On Wed, Sep 13, 2017 at 1:04 PM, Nicolai Hähnle <nhaehnle at gmail.com>
>> wrote:
>>>
>>> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>>>
>>> ---
>>>   src/amd/common/ac_llvm_build.c           | 3 +--
>>>   src/amd/common/ac_llvm_build.h           | 1 -
>>>   src/amd/common/ac_nir_to_llvm.c          | 5 +----
>>>   src/gallium/drivers/radeonsi/si_pipe.c   | 1 -
>>>   src/gallium/drivers/radeonsi/si_pipe.h   | 1 -
>>>   src/gallium/drivers/radeonsi/si_shader.c | 3 +--
>>>   6 files changed, 3 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/src/amd/common/ac_llvm_build.c
>>> b/src/amd/common/ac_llvm_build.c
>>> index 4077bd81bbc..6c010e8c3a6 100644
>>> --- a/src/amd/common/ac_llvm_build.c
>>> +++ b/src/amd/common/ac_llvm_build.c
>>> @@ -965,29 +965,28 @@ ac_get_thread_id(struct ac_llvm_context *ctx)
>>>    * So, masking the TID with 0xfffffffc yields the TID of the top left
>>> pixel
>>>    * of the quad, masking with 0xfffffffd yields the TID of the top pixel
>>> of
>>>    * the current pixel's column, and masking with 0xfffffffe yields the
>>> TID
>>>    * of the left pixel of the current pixel's row.
>>>    *
>>>    * Adding 1 yields the TID of the pixel to the right of the left pixel,
>>> and
>>>    * adding 2 yields the TID of the pixel below the top pixel.
>>>    */
>>>   LLVMValueRef
>>>   ac_build_ddxy(struct ac_llvm_context *ctx,
>>> -             bool has_ds_bpermute,
>>>                uint32_t mask,
>>>                int idx,
>>>                LLVMValueRef val)
>>>   {
>>>          LLVMValueRef tl, trbl, args[2];
>>>          LLVMValueRef result;
>>>
>>> -       if (has_ds_bpermute) {
>>> +       if (ctx->chip_class >= VI) {
>>>                  LLVMValueRef thread_id, tl_tid, trbl_tid;
>>>                  thread_id = ac_get_thread_id(ctx);
>>>
>>>                  tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
>>>                                        LLVMConstInt(ctx->i32, mask,
>>> false), "");
>>>
>>>                  trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
>>>                                          LLVMConstInt(ctx->i32, idx,
>>> false), "");
>>>
>>>                  args[0] = LLVMBuildMul(ctx->builder, tl_tid,
>>> diff --git a/src/amd/common/ac_llvm_build.h
>>> b/src/amd/common/ac_llvm_build.h
>>> index b6434893cfa..3f93551330c 100644
>>> --- a/src/amd/common/ac_llvm_build.h
>>> +++ b/src/amd/common/ac_llvm_build.h
>>> @@ -187,21 +187,20 @@ LLVMValueRef ac_build_buffer_load_format(struct
>>> ac_llvm_context *ctx,
>>>
>>>   LLVMValueRef
>>>   ac_get_thread_id(struct ac_llvm_context *ctx);
>>>
>>>   #define AC_TID_MASK_TOP_LEFT 0xfffffffc
>>>   #define AC_TID_MASK_TOP      0xfffffffd
>>>   #define AC_TID_MASK_LEFT     0xfffffffe
>>>
>>>   LLVMValueRef
>>>   ac_build_ddxy(struct ac_llvm_context *ctx,
>>> -             bool has_ds_bpermute,
>>>                uint32_t mask,
>>>                int idx,
>>>                LLVMValueRef val);
>>>
>>>   #define AC_SENDMSG_GS 2
>>>   #define AC_SENDMSG_GS_DONE 3
>>>
>>>   #define AC_SENDMSG_GS_OP_NOP      (0 << 4)
>>>   #define AC_SENDMSG_GS_OP_CUT      (1 << 4)
>>>   #define AC_SENDMSG_GS_OP_EMIT     (2 << 4)
>>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>>> b/src/amd/common/ac_nir_to_llvm.c
>>> index c0c4441022a..bf4b3ca6521 100644
>>> --- a/src/amd/common/ac_nir_to_llvm.c
>>> +++ b/src/amd/common/ac_nir_to_llvm.c
>>> @@ -1407,40 +1407,37 @@ static LLVMValueRef emit_unpack_half_2x16(struct
>>> ac_llvm_context *ctx,
>>>          return result;
>>>   }
>>>
>>>   static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
>>>                                nir_op op,
>>>                                LLVMValueRef src0)
>>>   {
>>>          unsigned mask;
>>>          int idx;
>>>          LLVMValueRef result;
>>> -       bool has_ds_bpermute = ctx->abi->chip_class >= VI;
>>>
>>>          if (op == nir_op_fddx_fine || op == nir_op_fddx)
>>>                  mask = AC_TID_MASK_LEFT;
>>>          else if (op == nir_op_fddy_fine || op == nir_op_fddy)
>>>                  mask = AC_TID_MASK_TOP;
>>>          else
>>>                  mask = AC_TID_MASK_TOP_LEFT;
>>>
>>>          /* for DDX we want to next X pixel, DDY next Y pixel. */
>>>          if (op == nir_op_fddx_fine ||
>>>              op == nir_op_fddx_coarse ||
>>>              op == nir_op_fddx)
>>>                  idx = 1;
>>>          else
>>>                  idx = 2;
>>>
>>> -       result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
>>> -                             mask, idx,
>>> -                             src0);
>>> +       result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
>>>          return result;
>>>   }
>>>
>>>   /*
>>>    * this takes an I,J coordinate pair,
>>>    * and works out the X and Y derivatives.
>>>    * it returns DDX(I), DDX(J), DDY(I), DDY(J).
>>>    */
>>>   static LLVMValueRef emit_ddxy_interp(
>>>          struct ac_nir_context *ctx,
>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
>>> b/src/gallium/drivers/radeonsi/si_pipe.c
>>> index ca2e055a90e..bb1362f1cfc 100644
>>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>>> @@ -1037,21 +1037,20 @@ struct pipe_screen *radeonsi_screen_create(struct
>>> radeon_winsys *ws,
>>>                  (sscreen->b.chip_class == VI &&
>>>                   sscreen->b.info.pfp_fw_version >= 121 &&
>>>                   sscreen->b.info.me_fw_version >= 87) ||
>>>                  (sscreen->b.chip_class == CIK &&
>>>                   sscreen->b.info.pfp_fw_version >= 211 &&
>>>                   sscreen->b.info.me_fw_version >= 173) ||
>>>                  (sscreen->b.chip_class == SI &&
>>>                   sscreen->b.info.pfp_fw_version >= 79 &&
>>>                   sscreen->b.info.me_fw_version >= 142);
>>>
>>> -       sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI;
>>>          sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >=
>>> CHIP_POLARIS10 &&
>>>                                              sscreen->b.family <=
>>> CHIP_POLARIS12) ||
>>>                                             sscreen->b.family ==
>>> CHIP_VEGA10 ||
>>>                                             sscreen->b.family ==
>>> CHIP_RAVEN;
>>>          sscreen->dpbb_allowed = sscreen->b.chip_class >= GFX9 &&
>>>                                  !(sscreen->b.debug_flags & DBG_NO_DPBB);
>>>          sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
>>>                                  !(sscreen->b.debug_flags & DBG_NO_DFSM);
>>>
>>>          /* While it would be nice not to have this flag, we are
>>> constrained
>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>>> b/src/gallium/drivers/radeonsi/si_pipe.h
>>> index 8db7028c9a1..10215a35886 100644
>>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>>> @@ -87,21 +87,20 @@ struct si_compute;
>>>   struct hash_table;
>>>   struct u_suballocator;
>>>
>>>   struct si_screen {
>>>          struct r600_common_screen       b;
>>>          unsigned                        gs_table_depth;
>>>          unsigned                        tess_offchip_block_dw_size;
>>>          bool                            has_clear_state;
>>>          bool                            has_distributed_tess;
>>>          bool                            has_draw_indirect_multi;
>>> -       bool                            has_ds_bpermute;
>>>          bool                            has_msaa_sample_loc_bug;
>>>          bool                            dpbb_allowed;
>>>          bool                            dfsm_allowed;
>>>          bool                            llvm_has_working_vgpr_indexing;
>>>
>>>          /* Whether shaders are monolithic (1-part) or separate (3-part).
>>> */
>>>          bool                            use_monolithic_shaders;
>>>          bool                            record_llvm_ir;
>>>
>>>          mtx_t                   shader_parts_mutex;
>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>>> b/src/gallium/drivers/radeonsi/si_shader.c
>>> index c4e7f225a8f..aea199d3efd 100644
>>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>>> @@ -3646,22 +3646,21 @@ static void si_llvm_emit_ddxy(
>>>                  mask = AC_TID_MASK_LEFT;
>>>          else if (opcode == TGSI_OPCODE_DDY_FINE)
>>>                  mask = AC_TID_MASK_TOP;
>>>          else
>>>                  mask = AC_TID_MASK_TOP_LEFT;
>>>
>>>          /* for DDX we want to next X pixel, DDY next Y pixel. */
>>>          idx = (opcode == TGSI_OPCODE_DDX || opcode ==
>>> TGSI_OPCODE_DDX_FINE) ? 1 : 2;
>>>
>>>          val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0],
>>> ctx->i32, "");
>>> -       val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
>>> -                           mask, idx, val);
>>> +       val = ac_build_ddxy(&ctx->ac, mask, idx, val);
>>>          emit_data->output[emit_data->chan] = val;
>>>   }
>>>
>>>   /*
>>>    * this takes an I,J coordinate pair,
>>>    * and works out the X and Y derivatives.
>>>    * it returns DDX(I), DDX(J), DDY(I), DDY(J).
>>>    */
>>>   static LLVMValueRef si_llvm_emit_ddxy_interp(
>>>          struct lp_build_tgsi_context *bld_base,
>>> --
>>> 2.11.0
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>


More information about the mesa-dev mailing list