[Mesa-dev] [PATCH 1/2] radeonsi/ac: move vertex export remove to common code.
Juan A. Suarez Romero
jasuarez at igalia.com
Thu Apr 27 08:50:20 UTC 2017
On Wed, 2017-04-26 at 09:12 +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This code can be shared by radv, we bump the max to
> VARYING_SLOT_MAX here, but that shouldn't have too
> much fallout.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/amd/common/ac_exp_param.h | 40 ++++++
> src/amd/common/ac_llvm_build.c | 156 +++++++++++++++++++++++-
> src/amd/common/ac_llvm_build.h | 6 +
> src/amd/common/ac_llvm_helper.cpp | 20 +++
> src/amd/common/ac_llvm_util.h | 2 +
> src/gallium/drivers/radeonsi/si_shader.c | 152 ++---------------------
> src/gallium/drivers/radeonsi/si_shader.h | 12 --
> src/gallium/drivers/radeonsi/si_state_shaders.c | 13 +-
> 8 files changed, 237 insertions(+), 164 deletions(-)
> create mode 100644 src/amd/common/ac_exp_param.h
>
> diff --git a/src/amd/common/ac_exp_param.h b/src/amd/common/ac_exp_param.h
> new file mode 100644
> index 0000000..b97ce81
> --- /dev/null
> +++ b/src/amd/common/ac_exp_param.h
> @@ -0,0 +1,40 @@
> +/*
> + * Copyright 2014 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + */
> +#ifndef AC_EXP_PARAM_H
> +#define AC_EXP_PARAM_H
> +
> +enum {
> + /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
> + AC_EXP_PARAM_OFFSET_0 = 0,
> + AC_EXP_PARAM_OFFSET_31 = 31,
> + /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
> + AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
> + AC_EXP_PARAM_DEFAULT_VAL_0001,
> + AC_EXP_PARAM_DEFAULT_VAL_1110,
> + AC_EXP_PARAM_DEFAULT_VAL_1111,
> + AC_EXP_PARAM_UNDEFINED = 255,
> +};
> +
> +#endif
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index d45094c..f452f3e 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -33,11 +33,13 @@
> #include <stdio.h>
>
> #include "ac_llvm_util.h"
> -
> +#include "ac_exp_param.h"
> #include "util/bitscan.h"
> #include "util/macros.h"
> #include "sid.h"
>
> +#include "shader_enums.h"
> +
> /* Initialize module-independent parts of the context.
> *
> * The caller is responsible for initializing ctx::module and ctx::builder.
> @@ -1244,3 +1246,155 @@ void ac_get_image_intr_name(const char *base_name,
> data_type_name, coords_type_name, rsrc_type_name);
> }
> }
> +
> +#define AC_EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
> +#define AC_EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
> +
> +/* Return true if the PARAM export has been eliminated. */
> +static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset,
> + uint32_t num_outputs,
> + LLVMValueRef inst, unsigned offset)
> +{
> + unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
> + bool is_zero[4] = {}, is_one[4] = {};
> +
> + for (i = 0; i < 4; i++) {
> + LLVMBool loses_info;
> + LLVMValueRef p = LLVMGetOperand(inst, AC_EXP_OUT0 + i);
> +
> + /* It's a constant expression. Undef outputs are eliminated too. */
> + if (LLVMIsUndef(p)) {
> + is_zero[i] = true;
> + is_one[i] = true;
> + } else if (LLVMIsAConstantFP(p)) {
> + double a = LLVMConstRealGetDouble(p, &loses_info);
> +
> + if (a == 0)
> + is_zero[i] = true;
> + else if (a == 1)
> + is_one[i] = true;
> + else
> + return false; /* other constant */
> + } else
> + return false;
> + }
> +
> + /* Only certain combinations of 0 and 1 can be eliminated. */
> + if (is_zero[0] && is_zero[1] && is_zero[2])
> + default_val = is_zero[3] ? 0 : 1;
> + else if (is_one[0] && is_one[1] && is_one[2])
> + default_val = is_zero[3] ? 2 : 3;
> + else
> + return false;
> +
> + /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
> + LLVMInstructionEraseFromParent(inst);
> +
> + /* Change OFFSET to DEFAULT_VAL. */
> + for (i = 0; i < num_outputs; i++) {
> + if (vs_output_param_offset[i] == offset) {
> + vs_output_param_offset[i] =
> + AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
> + break;
> + }
> + }
> + return true;
> +}
> +
> +struct ac_vs_exports {
> + unsigned num;
> + unsigned offset[VARYING_SLOT_MAX];
> + LLVMValueRef inst[VARYING_SLOT_MAX];
> +};
> +
> +void ac_eliminate_const_vs_outputs(struct ac_llvm_context *ctx,
> + LLVMValueRef main_fn,
> + uint8_t *vs_output_param_offset,
> + uint32_t num_outputs,
> + uint8_t *num_param_exports)
> +{
> + LLVMBasicBlockRef bb;
> + bool removed_any = false;
> + struct ac_vs_exports exports;
> +
> + assert(num_outputs < VARYING_SLOT_MAX);
> + exports.num = 0;
> +
> + /* Process all LLVM instructions. */
> + bb = LLVMGetFirstBasicBlock(main_fn);
> + while (bb) {
> + LLVMValueRef inst = LLVMGetFirstInstruction(bb);
> +
> + while (inst) {
> + LLVMValueRef cur = inst;
> + inst = LLVMGetNextInstruction(inst);
> +
> + if (LLVMGetInstructionOpcode(cur) != LLVMCall)
> + continue;
> +
> + LLVMValueRef callee = ac_llvm_get_called_value(cur);
> +
> + if (!ac_llvm_is_function(callee))
> + continue;
> +
> + const char *name = LLVMGetValueName(callee);
> + unsigned num_args = LLVMCountParams(callee);
> +
> + /* Check if this is an export instruction. */
> + if ((num_args != 9 && num_args != 8) ||
> + (strcmp(name, "llvm.SI.export") &&
> + strcmp(name, "llvm.amdgcn.exp.f32")))
> + continue;
> +
> + LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
> + unsigned target = LLVMConstIntGetZExtValue(arg);
> +
> + if (target < V_008DFC_SQ_EXP_PARAM)
> + continue;
> +
> + target -= V_008DFC_SQ_EXP_PARAM;
> +
> + /* Eliminate constant value PARAM exports. */
> + if (ac_eliminate_const_output(vs_output_param_offset,
> + num_outputs, cur, target)) {
> + removed_any = true;
> + } else {
> + exports.offset[exports.num] = target;
> + exports.inst[exports.num] = cur;
> + exports.num++;
> + }
> + }
> + bb = LLVMGetNextBasicBlock(bb);
> + }
> +
> + /* Remove holes in export memory due to removed PARAM exports.
> + * This is done by renumbering all PARAM exports.
> + */
> + if (removed_any) {
> + uint8_t current_offset[VARYING_SLOT_MAX];
> + unsigned new_count = 0;
> + unsigned out, i;
> +
> + /* Make a copy of the offsets. We need the old version while
> + * we are modifying some of them. */
> + memcpy(current_offset, vs_output_param_offset,
> + sizeof(current_offset));
> +
> + for (i = 0; i < exports.num; i++) {
> + unsigned offset = exports.offset[i];
> +
> + for (out = 0; out < num_outputs; out++) {
> + if (current_offset[out] != offset)
> + continue;
> +
> + LLVMSetOperand(exports.inst[i], AC_EXP_TARGET,
> + LLVMConstInt(ctx->i32,
> + V_008DFC_SQ_EXP_PARAM + new_count, 0));
> + vs_output_param_offset[out] = new_count;
> + new_count++;
> + break;
> + }
> + }
> + *num_param_exports = new_count;
> + }
> +}
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index d6edcde..1c3610a 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -239,6 +239,12 @@ void ac_get_image_intr_name(const char *base_name,
> LLVMTypeRef coords_type,
> LLVMTypeRef rsrc_type,
> char *out_name, unsigned out_len);
> +
> +void ac_eliminate_const_vs_outputs(struct ac_llvm_context *ac,
> + LLVMValueRef main_fn,
> + uint8_t *vs_output_param_offset,
> + uint32_t num_outputs,
> + uint8_t *num_param_exports);
> #ifdef __cplusplus
> }
> #endif
> diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp
> index 11fa809..582a8f7 100644
> --- a/src/amd/common/ac_llvm_helper.cpp
> +++ b/src/amd/common/ac_llvm_helper.cpp
> @@ -61,3 +61,23 @@ bool ac_is_sgpr_param(LLVMValueRef arg)
> return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
> AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
> }
> +
> +LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
> +{
> +#if HAVE_LLVM >= 0x0309
> + return LLVMGetCalledValue(call);
> +#elif HAVE_LLVM >= 0x0305
> + return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm::Instruction>(call)).getCalledValue());
This patch is breaking build when using LLVM 3.8.1.
> +#else
> + return NULL; /* radeonsi doesn't support so old LLVM. */
> +#endif
> +}
> +
> +bool ac_llvm_is_function(LLVMValueRef v)
> +{
> +#if HAVE_LLVM >= 0x0309
> + return LLVMGetValueKind(v) == LLVMFunctionValueKind;
> +#else
> + return llvm::isa<llvm::Function>(llvm::unwrap(v));
> +#endif
> +}
> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
> index faecf1e..38e7dde 100644
> --- a/src/amd/common/ac_llvm_util.h
> +++ b/src/amd/common/ac_llvm_util.h
> @@ -64,6 +64,8 @@ void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
> unsigned attrib_mask);
> void ac_dump_module(LLVMModuleRef module);
>
> +LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
> +bool ac_llvm_is_function(LLVMValueRef v);
> #ifdef __cplusplus
> }
> #endif
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 5d7175d..27d88b1 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -41,6 +41,7 @@
>
> #include "ac_binary.h"
> #include "ac_llvm_util.h"
> +#include "ac_exp_param.h"
> #include "si_shader_internal.h"
> #include "si_pipe.h"
> #include "sid.h"
> @@ -6793,76 +6794,10 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
> bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
> }
>
> -#define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
> -#define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
> -
> -/* Return true if the PARAM export has been eliminated. */
> -static bool si_eliminate_const_output(struct si_shader_context *ctx,
> - LLVMValueRef inst, unsigned offset)
> -{
> - struct si_shader *shader = ctx->shader;
> - unsigned num_outputs = shader->selector->info.num_outputs;
> - unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
> - bool is_zero[4] = {}, is_one[4] = {};
> -
> - for (i = 0; i < 4; i++) {
> - LLVMBool loses_info;
> - LLVMValueRef p = LLVMGetOperand(inst, EXP_OUT0 + i);
> -
> - /* It's a constant expression. Undef outputs are eliminated too. */
> - if (LLVMIsUndef(p)) {
> - is_zero[i] = true;
> - is_one[i] = true;
> - } else if (LLVMIsAConstantFP(p)) {
> - double a = LLVMConstRealGetDouble(p, &loses_info);
> -
> - if (a == 0)
> - is_zero[i] = true;
> - else if (a == 1)
> - is_one[i] = true;
> - else
> - return false; /* other constant */
> - } else
> - return false;
> - }
> -
> - /* Only certain combinations of 0 and 1 can be eliminated. */
> - if (is_zero[0] && is_zero[1] && is_zero[2])
> - default_val = is_zero[3] ? 0 : 1;
> - else if (is_one[0] && is_one[1] && is_one[2])
> - default_val = is_zero[3] ? 2 : 3;
> - else
> - return false;
> -
> - /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
> - LLVMInstructionEraseFromParent(inst);
> -
> - /* Change OFFSET to DEFAULT_VAL. */
> - for (i = 0; i < num_outputs; i++) {
> - if (shader->info.vs_output_param_offset[i] == offset) {
> - shader->info.vs_output_param_offset[i] =
> - EXP_PARAM_DEFAULT_VAL_0000 + default_val;
> - break;
> - }
> - }
> - return true;
> -}
> -
> -struct si_vs_exports {
> - unsigned num;
> - unsigned offset[SI_MAX_VS_OUTPUTS];
> - LLVMValueRef inst[SI_MAX_VS_OUTPUTS];
> -};
> -
> static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
> {
> struct si_shader *shader = ctx->shader;
> struct tgsi_shader_info *info = &shader->selector->info;
> - LLVMBasicBlockRef bb;
> - struct si_vs_exports exports;
> - bool removed_any = false;
> -
> - exports.num = 0;
>
> if (ctx->type == PIPE_SHADER_FRAGMENT ||
> ctx->type == PIPE_SHADER_COMPUTE ||
> @@ -6870,84 +6805,11 @@ static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
> shader->key.as_ls)
> return;
>
> - /* Process all LLVM instructions. */
> - bb = LLVMGetFirstBasicBlock(ctx->main_fn);
> - while (bb) {
> - LLVMValueRef inst = LLVMGetFirstInstruction(bb);
> -
> - while (inst) {
> - LLVMValueRef cur = inst;
> - inst = LLVMGetNextInstruction(inst);
> -
> - if (LLVMGetInstructionOpcode(cur) != LLVMCall)
> - continue;
> -
> - LLVMValueRef callee = lp_get_called_value(cur);
> -
> - if (!lp_is_function(callee))
> - continue;
> -
> - const char *name = LLVMGetValueName(callee);
> - unsigned num_args = LLVMCountParams(callee);
> -
> - /* Check if this is an export instruction. */
> - if ((num_args != 9 && num_args != 8) ||
> - (strcmp(name, "llvm.SI.export") &&
> - strcmp(name, "llvm.amdgcn.exp.f32")))
> - continue;
> -
> - LLVMValueRef arg = LLVMGetOperand(cur, EXP_TARGET);
> - unsigned target = LLVMConstIntGetZExtValue(arg);
> -
> - if (target < V_008DFC_SQ_EXP_PARAM)
> - continue;
> -
> - target -= V_008DFC_SQ_EXP_PARAM;
> -
> - /* Eliminate constant value PARAM exports. */
> - if (si_eliminate_const_output(ctx, cur, target)) {
> - removed_any = true;
> - } else {
> - exports.offset[exports.num] = target;
> - exports.inst[exports.num] = cur;
> - exports.num++;
> - }
> - }
> - bb = LLVMGetNextBasicBlock(bb);
> - }
> -
> - /* Remove holes in export memory due to removed PARAM exports.
> - * This is done by renumbering all PARAM exports.
> - */
> - if (removed_any) {
> - ubyte current_offset[SI_MAX_VS_OUTPUTS];
> - unsigned new_count = 0;
> - unsigned out, i;
> -
> - /* Make a copy of the offsets. We need the old version while
> - * we are modifying some of them. */
> - assert(sizeof(current_offset) ==
> - sizeof(shader->info.vs_output_param_offset));
> - memcpy(current_offset, shader->info.vs_output_param_offset,
> - sizeof(current_offset));
> -
> - for (i = 0; i < exports.num; i++) {
> - unsigned offset = exports.offset[i];
> -
> - for (out = 0; out < info->num_outputs; out++) {
> - if (current_offset[out] != offset)
> - continue;
> -
> - LLVMSetOperand(exports.inst[i], EXP_TARGET,
> - LLVMConstInt(ctx->i32,
> - V_008DFC_SQ_EXP_PARAM + new_count, 0));
> - shader->info.vs_output_param_offset[out] = new_count;
> - new_count++;
> - break;
> - }
> - }
> - shader->info.nr_param_exports = new_count;
> - }
> + ac_eliminate_const_vs_outputs(&ctx->ac,
> + ctx->main_fn,
> + shader->info.vs_output_param_offset,
> + info->num_outputs,
> + &shader->info.nr_param_exports);
> }
>
> static void si_count_scratch_private_memory(struct si_shader_context *ctx)
> @@ -7521,7 +7383,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
> si_init_shader_ctx(&ctx, sscreen, shader, tm);
> ctx.separate_prolog = !is_monolithic;
>
> - memset(shader->info.vs_output_param_offset, EXP_PARAM_UNDEFINED,
> + memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
> sizeof(shader->info.vs_output_param_offset));
>
> shader->info.uses_instanceid = sel->info.uses_instanceid;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index cfa691b..2dfb567 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -486,18 +486,6 @@ struct si_shader_config {
> unsigned rsrc2;
> };
>
> -enum {
> - /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
> - EXP_PARAM_OFFSET_0 = 0,
> - EXP_PARAM_OFFSET_31 = 31,
> - /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
> - EXP_PARAM_DEFAULT_VAL_0000 = 64,
> - EXP_PARAM_DEFAULT_VAL_0001,
> - EXP_PARAM_DEFAULT_VAL_1110,
> - EXP_PARAM_DEFAULT_VAL_1111,
> - EXP_PARAM_UNDEFINED = 255,
> -};
> -
> /* GCN-specific shader info. */
> struct si_shader_info {
> ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 21185c3..baf1eae 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -39,6 +39,7 @@
>
> #include "util/disk_cache.h"
> #include "util/mesa-sha1.h"
> +#include "ac_exp_param.h"
>
> /* SHADER_CACHE */
>
> @@ -1506,7 +1507,7 @@ void si_init_shader_selector_async(void *job, int thread_index)
> for (i = 0; i < sel->info.num_outputs; i++) {
> unsigned offset = shader->info.vs_output_param_offset[i];
>
> - if (offset <= EXP_PARAM_OFFSET_31)
> + if (offset <= AC_EXP_PARAM_OFFSET_31)
> continue;
>
> unsigned name = sel->info.output_semantic_name[i];
> @@ -2001,18 +2002,18 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
> index == vsinfo->output_semantic_index[j]) {
> offset = vs->info.vs_output_param_offset[j];
>
> - if (offset <= EXP_PARAM_OFFSET_31) {
> + if (offset <= AC_EXP_PARAM_OFFSET_31) {
> /* The input is loaded from parameter memory. */
> ps_input_cntl |= S_028644_OFFSET(offset);
> } else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
> - if (offset == EXP_PARAM_UNDEFINED) {
> + if (offset == AC_EXP_PARAM_UNDEFINED) {
> /* This can happen with depth-only rendering. */
> offset = 0;
> } else {
> /* The input is a DEFAULT_VAL constant. */
> - assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
> - offset <= EXP_PARAM_DEFAULT_VAL_1111);
> - offset -= EXP_PARAM_DEFAULT_VAL_0000;
> + assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
> + offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
> + offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
> }
>
> ps_input_cntl = S_028644_OFFSET(0x20) |
More information about the mesa-dev
mailing list