[Mesa-dev] [PATCH 4/5] amd/common: remove has_ds_bpermute argument from ac_build_ddxy

Connor Abbott cwabbott0 at gmail.com
Wed Sep 13 18:45:31 UTC 2017


Not sure if we'll want to do this, since we'll need to need to
effectively revert it anyways when we implement derivatives with DPP
(although we'll have to rename has_ds_bpermute to has_dpp...).

On Wed, Sep 13, 2017 at 1:04 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> ---
>  src/amd/common/ac_llvm_build.c           | 3 +--
>  src/amd/common/ac_llvm_build.h           | 1 -
>  src/amd/common/ac_nir_to_llvm.c          | 5 +----
>  src/gallium/drivers/radeonsi/si_pipe.c   | 1 -
>  src/gallium/drivers/radeonsi/si_pipe.h   | 1 -
>  src/gallium/drivers/radeonsi/si_shader.c | 3 +--
>  6 files changed, 3 insertions(+), 11 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 4077bd81bbc..6c010e8c3a6 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -965,29 +965,28 @@ ac_get_thread_id(struct ac_llvm_context *ctx)
>   * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
>   * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
>   * the current pixel's column, and masking with 0xfffffffe yields the TID
>   * of the left pixel of the current pixel's row.
>   *
>   * Adding 1 yields the TID of the pixel to the right of the left pixel, and
>   * adding 2 yields the TID of the pixel below the top pixel.
>   */
>  LLVMValueRef
>  ac_build_ddxy(struct ac_llvm_context *ctx,
> -             bool has_ds_bpermute,
>               uint32_t mask,
>               int idx,
>               LLVMValueRef val)
>  {
>         LLVMValueRef tl, trbl, args[2];
>         LLVMValueRef result;
>
> -       if (has_ds_bpermute) {
> +       if (ctx->chip_class >= VI) {
>                 LLVMValueRef thread_id, tl_tid, trbl_tid;
>                 thread_id = ac_get_thread_id(ctx);
>
>                 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
>                                       LLVMConstInt(ctx->i32, mask, false), "");
>
>                 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
>                                         LLVMConstInt(ctx->i32, idx, false), "");
>
>                 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index b6434893cfa..3f93551330c 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -187,21 +187,20 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
>
>  LLVMValueRef
>  ac_get_thread_id(struct ac_llvm_context *ctx);
>
>  #define AC_TID_MASK_TOP_LEFT 0xfffffffc
>  #define AC_TID_MASK_TOP      0xfffffffd
>  #define AC_TID_MASK_LEFT     0xfffffffe
>
>  LLVMValueRef
>  ac_build_ddxy(struct ac_llvm_context *ctx,
> -             bool has_ds_bpermute,
>               uint32_t mask,
>               int idx,
>               LLVMValueRef val);
>
>  #define AC_SENDMSG_GS 2
>  #define AC_SENDMSG_GS_DONE 3
>
>  #define AC_SENDMSG_GS_OP_NOP      (0 << 4)
>  #define AC_SENDMSG_GS_OP_CUT      (1 << 4)
>  #define AC_SENDMSG_GS_OP_EMIT     (2 << 4)
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index c0c4441022a..bf4b3ca6521 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1407,40 +1407,37 @@ static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
>         return result;
>  }
>
>  static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
>                               nir_op op,
>                               LLVMValueRef src0)
>  {
>         unsigned mask;
>         int idx;
>         LLVMValueRef result;
> -       bool has_ds_bpermute = ctx->abi->chip_class >= VI;
>
>         if (op == nir_op_fddx_fine || op == nir_op_fddx)
>                 mask = AC_TID_MASK_LEFT;
>         else if (op == nir_op_fddy_fine || op == nir_op_fddy)
>                 mask = AC_TID_MASK_TOP;
>         else
>                 mask = AC_TID_MASK_TOP_LEFT;
>
>         /* for DDX we want to next X pixel, DDY next Y pixel. */
>         if (op == nir_op_fddx_fine ||
>             op == nir_op_fddx_coarse ||
>             op == nir_op_fddx)
>                 idx = 1;
>         else
>                 idx = 2;
>
> -       result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
> -                             mask, idx,
> -                             src0);
> +       result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
>         return result;
>  }
>
>  /*
>   * this takes an I,J coordinate pair,
>   * and works out the X and Y derivatives.
>   * it returns DDX(I), DDX(J), DDY(I), DDY(J).
>   */
>  static LLVMValueRef emit_ddxy_interp(
>         struct ac_nir_context *ctx,
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index ca2e055a90e..bb1362f1cfc 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -1037,21 +1037,20 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
>                 (sscreen->b.chip_class == VI &&
>                  sscreen->b.info.pfp_fw_version >= 121 &&
>                  sscreen->b.info.me_fw_version >= 87) ||
>                 (sscreen->b.chip_class == CIK &&
>                  sscreen->b.info.pfp_fw_version >= 211 &&
>                  sscreen->b.info.me_fw_version >= 173) ||
>                 (sscreen->b.chip_class == SI &&
>                  sscreen->b.info.pfp_fw_version >= 79 &&
>                  sscreen->b.info.me_fw_version >= 142);
>
> -       sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI;
>         sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
>                                             sscreen->b.family <= CHIP_POLARIS12) ||
>                                            sscreen->b.family == CHIP_VEGA10 ||
>                                            sscreen->b.family == CHIP_RAVEN;
>         sscreen->dpbb_allowed = sscreen->b.chip_class >= GFX9 &&
>                                 !(sscreen->b.debug_flags & DBG_NO_DPBB);
>         sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
>                                 !(sscreen->b.debug_flags & DBG_NO_DFSM);
>
>         /* While it would be nice not to have this flag, we are constrained
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index 8db7028c9a1..10215a35886 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -87,21 +87,20 @@ struct si_compute;
>  struct hash_table;
>  struct u_suballocator;
>
>  struct si_screen {
>         struct r600_common_screen       b;
>         unsigned                        gs_table_depth;
>         unsigned                        tess_offchip_block_dw_size;
>         bool                            has_clear_state;
>         bool                            has_distributed_tess;
>         bool                            has_draw_indirect_multi;
> -       bool                            has_ds_bpermute;
>         bool                            has_msaa_sample_loc_bug;
>         bool                            dpbb_allowed;
>         bool                            dfsm_allowed;
>         bool                            llvm_has_working_vgpr_indexing;
>
>         /* Whether shaders are monolithic (1-part) or separate (3-part). */
>         bool                            use_monolithic_shaders;
>         bool                            record_llvm_ir;
>
>         mtx_t                   shader_parts_mutex;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index c4e7f225a8f..aea199d3efd 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -3646,22 +3646,21 @@ static void si_llvm_emit_ddxy(
>                 mask = AC_TID_MASK_LEFT;
>         else if (opcode == TGSI_OPCODE_DDY_FINE)
>                 mask = AC_TID_MASK_TOP;
>         else
>                 mask = AC_TID_MASK_TOP_LEFT;
>
>         /* for DDX we want to next X pixel, DDY next Y pixel. */
>         idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
>
>         val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
> -       val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
> -                           mask, idx, val);
> +       val = ac_build_ddxy(&ctx->ac, mask, idx, val);
>         emit_data->output[emit_data->chan] = val;
>  }
>
>  /*
>   * this takes an I,J coordinate pair,
>   * and works out the X and Y derivatives.
>   * it returns DDX(I), DDX(J), DDY(I), DDY(J).
>   */
>  static LLVMValueRef si_llvm_emit_ddxy_interp(
>         struct lp_build_tgsi_context *bld_base,
> --
> 2.11.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list