[Mesa-dev] [PATCH 4/5] amd/common: remove has_ds_bpermute argument from ac_build_ddxy
Nicolai Hähnle
nicolai.haehnle at amd.com
Wed Sep 13 19:54:58 UTC 2017
On 13.09.2017 21:48, Connor Abbott wrote:
> On Wed, Sep 13, 2017 at 3:46 PM, Nicolai Hähnle <nicolai.haehnle at amd.com> wrote:
>> On 13.09.2017 20:45, Connor Abbott wrote:
>>>
>>> Not sure if we'll want to do this, since we'll need to need to
>>> effectively revert it anyways when we implement derivatives with DPP
>>> (although we'll have to rename has_ds_bpermute to has_dpp...).
>>
>>
>> Is there a reason for not deriving has_dpp from chip_class?
>
> Yeah, good point. Maybe chip_class wasn't available when the field was added?
Right :)
I think this code has moved and been re-shaped quite a bit over the years...
>
>>
>> Cheers,
>> Nicolai
>>
>>
>>>
>>> On Wed, Sep 13, 2017 at 1:04 PM, Nicolai Hähnle <nhaehnle at gmail.com>
>>> wrote:
>>>>
>>>> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>>>>
>>>> ---
>>>> src/amd/common/ac_llvm_build.c | 3 +--
>>>> src/amd/common/ac_llvm_build.h | 1 -
>>>> src/amd/common/ac_nir_to_llvm.c | 5 +----
>>>> src/gallium/drivers/radeonsi/si_pipe.c | 1 -
>>>> src/gallium/drivers/radeonsi/si_pipe.h | 1 -
>>>> src/gallium/drivers/radeonsi/si_shader.c | 3 +--
>>>> 6 files changed, 3 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/src/amd/common/ac_llvm_build.c
>>>> b/src/amd/common/ac_llvm_build.c
>>>> index 4077bd81bbc..6c010e8c3a6 100644
>>>> --- a/src/amd/common/ac_llvm_build.c
>>>> +++ b/src/amd/common/ac_llvm_build.c
>>>> @@ -965,29 +965,28 @@ ac_get_thread_id(struct ac_llvm_context *ctx)
>>>> * So, masking the TID with 0xfffffffc yields the TID of the top left
>>>> pixel
>>>> * of the quad, masking with 0xfffffffd yields the TID of the top pixel
>>>> of
>>>> * the current pixel's column, and masking with 0xfffffffe yields the
>>>> TID
>>>> * of the left pixel of the current pixel's row.
>>>> *
>>>> * Adding 1 yields the TID of the pixel to the right of the left pixel,
>>>> and
>>>> * adding 2 yields the TID of the pixel below the top pixel.
>>>> */
>>>> LLVMValueRef
>>>> ac_build_ddxy(struct ac_llvm_context *ctx,
>>>> - bool has_ds_bpermute,
>>>> uint32_t mask,
>>>> int idx,
>>>> LLVMValueRef val)
>>>> {
>>>> LLVMValueRef tl, trbl, args[2];
>>>> LLVMValueRef result;
>>>>
>>>> - if (has_ds_bpermute) {
>>>> + if (ctx->chip_class >= VI) {
>>>> LLVMValueRef thread_id, tl_tid, trbl_tid;
>>>> thread_id = ac_get_thread_id(ctx);
>>>>
>>>> tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
>>>> LLVMConstInt(ctx->i32, mask,
>>>> false), "");
>>>>
>>>> trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
>>>> LLVMConstInt(ctx->i32, idx,
>>>> false), "");
>>>>
>>>> args[0] = LLVMBuildMul(ctx->builder, tl_tid,
>>>> diff --git a/src/amd/common/ac_llvm_build.h
>>>> b/src/amd/common/ac_llvm_build.h
>>>> index b6434893cfa..3f93551330c 100644
>>>> --- a/src/amd/common/ac_llvm_build.h
>>>> +++ b/src/amd/common/ac_llvm_build.h
>>>> @@ -187,21 +187,20 @@ LLVMValueRef ac_build_buffer_load_format(struct
>>>> ac_llvm_context *ctx,
>>>>
>>>> LLVMValueRef
>>>> ac_get_thread_id(struct ac_llvm_context *ctx);
>>>>
>>>> #define AC_TID_MASK_TOP_LEFT 0xfffffffc
>>>> #define AC_TID_MASK_TOP 0xfffffffd
>>>> #define AC_TID_MASK_LEFT 0xfffffffe
>>>>
>>>> LLVMValueRef
>>>> ac_build_ddxy(struct ac_llvm_context *ctx,
>>>> - bool has_ds_bpermute,
>>>> uint32_t mask,
>>>> int idx,
>>>> LLVMValueRef val);
>>>>
>>>> #define AC_SENDMSG_GS 2
>>>> #define AC_SENDMSG_GS_DONE 3
>>>>
>>>> #define AC_SENDMSG_GS_OP_NOP (0 << 4)
>>>> #define AC_SENDMSG_GS_OP_CUT (1 << 4)
>>>> #define AC_SENDMSG_GS_OP_EMIT (2 << 4)
>>>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>>>> b/src/amd/common/ac_nir_to_llvm.c
>>>> index c0c4441022a..bf4b3ca6521 100644
>>>> --- a/src/amd/common/ac_nir_to_llvm.c
>>>> +++ b/src/amd/common/ac_nir_to_llvm.c
>>>> @@ -1407,40 +1407,37 @@ static LLVMValueRef emit_unpack_half_2x16(struct
>>>> ac_llvm_context *ctx,
>>>> return result;
>>>> }
>>>>
>>>> static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
>>>> nir_op op,
>>>> LLVMValueRef src0)
>>>> {
>>>> unsigned mask;
>>>> int idx;
>>>> LLVMValueRef result;
>>>> - bool has_ds_bpermute = ctx->abi->chip_class >= VI;
>>>>
>>>> if (op == nir_op_fddx_fine || op == nir_op_fddx)
>>>> mask = AC_TID_MASK_LEFT;
>>>> else if (op == nir_op_fddy_fine || op == nir_op_fddy)
>>>> mask = AC_TID_MASK_TOP;
>>>> else
>>>> mask = AC_TID_MASK_TOP_LEFT;
>>>>
>>>> /* for DDX we want to next X pixel, DDY next Y pixel. */
>>>> if (op == nir_op_fddx_fine ||
>>>> op == nir_op_fddx_coarse ||
>>>> op == nir_op_fddx)
>>>> idx = 1;
>>>> else
>>>> idx = 2;
>>>>
>>>> - result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
>>>> - mask, idx,
>>>> - src0);
>>>> + result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
>>>> return result;
>>>> }
>>>>
>>>> /*
>>>> * this takes an I,J coordinate pair,
>>>> * and works out the X and Y derivatives.
>>>> * it returns DDX(I), DDX(J), DDY(I), DDY(J).
>>>> */
>>>> static LLVMValueRef emit_ddxy_interp(
>>>> struct ac_nir_context *ctx,
>>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
>>>> b/src/gallium/drivers/radeonsi/si_pipe.c
>>>> index ca2e055a90e..bb1362f1cfc 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>>>> @@ -1037,21 +1037,20 @@ struct pipe_screen *radeonsi_screen_create(struct
>>>> radeon_winsys *ws,
>>>> (sscreen->b.chip_class == VI &&
>>>> sscreen->b.info.pfp_fw_version >= 121 &&
>>>> sscreen->b.info.me_fw_version >= 87) ||
>>>> (sscreen->b.chip_class == CIK &&
>>>> sscreen->b.info.pfp_fw_version >= 211 &&
>>>> sscreen->b.info.me_fw_version >= 173) ||
>>>> (sscreen->b.chip_class == SI &&
>>>> sscreen->b.info.pfp_fw_version >= 79 &&
>>>> sscreen->b.info.me_fw_version >= 142);
>>>>
>>>> - sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI;
>>>> sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >=
>>>> CHIP_POLARIS10 &&
>>>> sscreen->b.family <=
>>>> CHIP_POLARIS12) ||
>>>> sscreen->b.family ==
>>>> CHIP_VEGA10 ||
>>>> sscreen->b.family ==
>>>> CHIP_RAVEN;
>>>> sscreen->dpbb_allowed = sscreen->b.chip_class >= GFX9 &&
>>>> !(sscreen->b.debug_flags & DBG_NO_DPBB);
>>>> sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
>>>> !(sscreen->b.debug_flags & DBG_NO_DFSM);
>>>>
>>>> /* While it would be nice not to have this flag, we are
>>>> constrained
>>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>>>> b/src/gallium/drivers/radeonsi/si_pipe.h
>>>> index 8db7028c9a1..10215a35886 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>>>> @@ -87,21 +87,20 @@ struct si_compute;
>>>> struct hash_table;
>>>> struct u_suballocator;
>>>>
>>>> struct si_screen {
>>>> struct r600_common_screen b;
>>>> unsigned gs_table_depth;
>>>> unsigned tess_offchip_block_dw_size;
>>>> bool has_clear_state;
>>>> bool has_distributed_tess;
>>>> bool has_draw_indirect_multi;
>>>> - bool has_ds_bpermute;
>>>> bool has_msaa_sample_loc_bug;
>>>> bool dpbb_allowed;
>>>> bool dfsm_allowed;
>>>> bool llvm_has_working_vgpr_indexing;
>>>>
>>>> /* Whether shaders are monolithic (1-part) or separate (3-part).
>>>> */
>>>> bool use_monolithic_shaders;
>>>> bool record_llvm_ir;
>>>>
>>>> mtx_t shader_parts_mutex;
>>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>>>> b/src/gallium/drivers/radeonsi/si_shader.c
>>>> index c4e7f225a8f..aea199d3efd 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>>>> @@ -3646,22 +3646,21 @@ static void si_llvm_emit_ddxy(
>>>> mask = AC_TID_MASK_LEFT;
>>>> else if (opcode == TGSI_OPCODE_DDY_FINE)
>>>> mask = AC_TID_MASK_TOP;
>>>> else
>>>> mask = AC_TID_MASK_TOP_LEFT;
>>>>
>>>> /* for DDX we want to next X pixel, DDY next Y pixel. */
>>>> idx = (opcode == TGSI_OPCODE_DDX || opcode ==
>>>> TGSI_OPCODE_DDX_FINE) ? 1 : 2;
>>>>
>>>> val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0],
>>>> ctx->i32, "");
>>>> - val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
>>>> - mask, idx, val);
>>>> + val = ac_build_ddxy(&ctx->ac, mask, idx, val);
>>>> emit_data->output[emit_data->chan] = val;
>>>> }
>>>>
>>>> /*
>>>> * this takes an I,J coordinate pair,
>>>> * and works out the X and Y derivatives.
>>>> * it returns DDX(I), DDX(J), DDY(I), DDY(J).
>>>> */
>>>> static LLVMValueRef si_llvm_emit_ddxy_interp(
>>>> struct lp_build_tgsi_context *bld_base,
>>>> --
>>>> 2.11.0
>>>>
>>>> _______________________________________________
>>>> mesa-dev mailing list
>>>> mesa-dev at lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>>
More information about the mesa-dev
mailing list