[Mesa-dev] [PATCH 10/14] glsl_to_tgsi: remove subroutine support
Brian Paul
brianp at vmware.com
Mon Oct 17 18:21:13 UTC 2016
The series looks OK to me.
Acked-by: Brian Paul <brianp at vmware.com>
I guess I have a general question though. Over the years of debugging
GL apps I've seen a few that generate pretty huge shaders (in terms of
TGSI instructions). I recall one (can't remember the app) in which a
moderate size shader compiled into several 10s of thousands of
instruction. The cause was all the function call inlining.
I'm wondering if there are cases where not inlining everything and using
actual subroutines would make sense. Has anyone ever taken a look into
that? Do any modern devices have a limit on program instructions?
-Brian
On 10/17/2016 07:39 AM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> Never used. The GLSL compiler doesn't even look at EmitNoFunctions.
> ---
> src/mesa/state_tracker/st_extensions.c | 6 +-
> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 212 +----------------------------
> 2 files changed, 7 insertions(+), 211 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
> index b87a3db..13b7ae4 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -265,24 +265,22 @@ void st_init_limits(struct pipe_screen *screen,
>
> options->EmitNoNoise = TRUE;
>
> /* TODO: make these more fine-grained if anyone needs it */
> options->MaxIfDepth =
> screen->get_shader_param(screen, sh,
> PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH);
> options->EmitNoLoops =
> !screen->get_shader_param(screen, sh,
> PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH);
> - options->EmitNoFunctions =
> - !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUBROUTINES);
> - options->EmitNoMainReturn =
> - !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUBROUTINES);
> + options->EmitNoFunctions = true;
> + options->EmitNoMainReturn = true;
>
> options->EmitNoCont =
> !screen->get_shader_param(screen, sh,
> PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED);
>
> options->EmitNoIndirectInput =
> !screen->get_shader_param(screen, sh,
> PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR);
> options->EmitNoIndirectOutput =
> !screen->get_shader_param(screen, sh,
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 2ae15c9..293654c 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -284,21 +284,20 @@ public:
> unsigned sampler_base:5;
> unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if not array */
> unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */
> glsl_base_type tex_type:4;
> unsigned tex_shadow:1;
> unsigned image_format:9;
> unsigned tex_offset_num_offset:3;
> unsigned dead_mask:4; /**< Used in dead code elimination */
> unsigned buffer_access:3; /**< buffer access type */
>
> - class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
> const struct tgsi_opcode_info *info;
> };
>
> class variable_storage : public exec_node {
> public:
> variable_storage(ir_variable *var, gl_register_file file, int index,
> unsigned array_id = 0)
> : file(file), index(index), component(0), var(var), array_id(array_id)
> {
> assert(file != PROGRAM_ARRAY || array_id != 0);
> @@ -324,52 +323,20 @@ public:
> this->size32 = size32;
> this->type = type;
> }
>
> /* doubles are stored across 2 gl_constant_values */
> gl_constant_value values[4];
> int size32; /**< Number of 32-bit components (1-4) */
> int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
> };
>
> -class function_entry : public exec_node {
> -public:
> - ir_function_signature *sig;
> -
> - /**
> - * identifier of this function signature used by the program.
> - *
> - * At the point that TGSI instructions for function calls are
> - * generated, we don't know the address of the first instruction of
> - * the function body. So we make the BranchTarget that is called a
> - * small integer and rewrite them during set_branchtargets().
> - */
> - int sig_id;
> -
> - /**
> - * Pointer to first instruction of the function body.
> - *
> - * Set during function body emits after main() is processed.
> - */
> - glsl_to_tgsi_instruction *bgn_inst;
> -
> - /**
> - * Index of the first instruction of the function body in actual TGSI.
> - *
> - * Set after conversion from glsl_to_tgsi_instruction to TGSI.
> - */
> - int inst;
> -
> - /** Storage for the return value. */
> - st_src_reg return_reg;
> -};
> -
> static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
> static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
>
> struct inout_decl {
> unsigned mesa_index;
> unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */
> unsigned size;
> enum glsl_base_type base_type;
> ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */
> };
> @@ -404,22 +371,20 @@ find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id)
> struct rename_reg_pair {
> int old_reg;
> int new_reg;
> };
>
> struct glsl_to_tgsi_visitor : public ir_visitor {
> public:
> glsl_to_tgsi_visitor();
> ~glsl_to_tgsi_visitor();
>
> - function_entry *current_function;
> -
> struct gl_context *ctx;
> struct gl_program *prog;
> struct gl_shader_program *shader_program;
> struct gl_linked_shader *shader;
> struct gl_shader_compiler_options *options;
>
> int next_temp;
>
> unsigned *array_sizes;
> unsigned max_num_arrays;
> @@ -447,22 +412,20 @@ public:
> bool native_integers;
> bool have_sqrt;
> bool have_fma;
> bool use_shared_memory;
>
> variable_storage *find_variable_storage(ir_variable *var);
>
> int add_constant(gl_register_file file, gl_constant_value values[8],
> int size, int datatype, uint16_t *swizzle_out);
>
> - function_entry *get_function_signature(ir_function_signature *sig);
> -
> st_src_reg get_temp(const glsl_type *type);
> void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
>
> st_src_reg st_src_reg_for_double(double val);
> st_src_reg st_src_reg_for_float(float val);
> st_src_reg st_src_reg_for_int(int val);
> st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val);
>
> /**
> * \name Visit methods
> @@ -504,24 +467,20 @@ public:
>
> st_src_reg result;
>
> /** List of variable_storage */
> exec_list variables;
>
> /** List of immediate_storage */
> exec_list immediates;
> unsigned num_immediates;
>
> - /** List of function_entry */
> - exec_list function_signatures;
> - int next_signature_id;
> -
> /** List of glsl_to_tgsi_instruction */
> exec_list instructions;
>
> glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
> st_dst_reg dst = undef_dst,
> st_src_reg src0 = undef_src,
> st_src_reg src1 = undef_src,
> st_src_reg src2 = undef_src,
> st_src_reg src3 = undef_src);
>
> @@ -712,22 +671,20 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
> inst->src[2] = src2;
> inst->src[3] = src3;
> inst->is_64bit_expanded = false;
> inst->ir = ir;
> inst->dead_mask = 0;
> /* default to float, for paths where this is not initialized
> * (since 0==UINT which is likely wrong):
> */
> inst->tex_type = GLSL_TYPE_FLOAT;
>
> - inst->function = NULL;
> -
> /* Update indirect addressing status used by TGSI */
> if (dst.reladdr || dst.reladdr2) {
> switch(dst.file) {
> case PROGRAM_STATE_VAR:
> case PROGRAM_CONSTANT:
> case PROGRAM_UNIFORM:
> this->indirect_addr_consts = true;
> break;
> case PROGRAM_IMMEDIATE:
> assert(!"immediates should not have indirect addressing");
> @@ -3181,56 +3138,20 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
> }
>
> this->result = st_src_reg(file, -1, ir->type);
> this->result.index = add_constant(file,
> values,
> ir->type->vector_elements,
> gl_type,
> &this->result.swizzle);
> }
>
> -function_entry *
> -glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
> -{
> - foreach_in_list_use_after(function_entry, entry, &this->function_signatures) {
> - if (entry->sig == sig)
> - return entry;
> - }
> -
> - entry = ralloc(mem_ctx, function_entry);
> - entry->sig = sig;
> - entry->sig_id = this->next_signature_id++;
> - entry->bgn_inst = NULL;
> -
> - /* Allocate storage for all the parameters. */
> - foreach_in_list(ir_variable, param, &sig->parameters) {
> - variable_storage *storage;
> -
> - storage = find_variable_storage(param);
> - assert(!storage);
> -
> - st_src_reg src = get_temp(param->type);
> -
> - storage = new(mem_ctx) variable_storage(param, src.file, src.index);
> - this->variables.push_tail(storage);
> - }
> -
> - if (!sig->return_type->is_void()) {
> - entry->return_reg = get_temp(sig->return_type);
> - } else {
> - entry->return_reg = undef_src;
> - }
> -
> - this->function_signatures.push_tail(entry);
> - return entry;
> -}
> -
> void
> glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
> {
> exec_node *param = ir->actual_parameters.get_head();
> ir_dereference *deref = static_cast<ir_dereference *>(param);
> ir_variable *location = deref->variable_referenced();
>
> st_src_reg buffer(
> PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT);
>
> @@ -3727,30 +3648,24 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
> inst->buffer_access |= TGSI_MEMORY_COHERENT;
> if (imgvar->data.image_restrict)
> inst->buffer_access |= TGSI_MEMORY_RESTRICT;
> if (imgvar->data.image_volatile)
> inst->buffer_access |= TGSI_MEMORY_VOLATILE;
> }
>
> void
> glsl_to_tgsi_visitor::visit(ir_call *ir)
> {
> - glsl_to_tgsi_instruction *call_inst;
> ir_function_signature *sig = ir->callee;
> - function_entry *entry;
> - int i;
>
> /* Filter out intrinsics */
> switch (sig->intrinsic_id) {
> - case ir_intrinsic_invalid:
> - break;
> -
> case ir_intrinsic_atomic_counter_read:
> case ir_intrinsic_atomic_counter_increment:
> case ir_intrinsic_atomic_counter_predecrement:
> case ir_intrinsic_atomic_counter_add:
> case ir_intrinsic_atomic_counter_min:
> case ir_intrinsic_atomic_counter_max:
> case ir_intrinsic_atomic_counter_and:
> case ir_intrinsic_atomic_counter_or:
> case ir_intrinsic_atomic_counter_xor:
> case ir_intrinsic_atomic_counter_exchange:
> @@ -3801,98 +3716,34 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
> case ir_intrinsic_image_atomic_and:
> case ir_intrinsic_image_atomic_or:
> case ir_intrinsic_image_atomic_xor:
> case ir_intrinsic_image_atomic_exchange:
> case ir_intrinsic_image_atomic_comp_swap:
> case ir_intrinsic_image_size:
> case ir_intrinsic_image_samples:
> visit_image_intrinsic(ir);
> return;
>
> + case ir_intrinsic_invalid:
> case ir_intrinsic_generic_load:
> case ir_intrinsic_generic_store:
> case ir_intrinsic_generic_atomic_add:
> case ir_intrinsic_generic_atomic_and:
> case ir_intrinsic_generic_atomic_or:
> case ir_intrinsic_generic_atomic_xor:
> case ir_intrinsic_generic_atomic_min:
> case ir_intrinsic_generic_atomic_max:
> case ir_intrinsic_generic_atomic_exchange:
> case ir_intrinsic_generic_atomic_comp_swap:
> case ir_intrinsic_shader_clock:
> unreachable("Invalid intrinsic");
> }
> -
> - entry = get_function_signature(sig);
> - /* Process in parameters. */
> - foreach_two_lists(formal_node, &sig->parameters,
> - actual_node, &ir->actual_parameters) {
> - ir_rvalue *param_rval = (ir_rvalue *) actual_node;
> - ir_variable *param = (ir_variable *) formal_node;
> -
> - if (param->data.mode == ir_var_function_in ||
> - param->data.mode == ir_var_function_inout) {
> - variable_storage *storage = find_variable_storage(param);
> - assert(storage);
> -
> - param_rval->accept(this);
> - st_src_reg r = this->result;
> -
> - st_dst_reg l;
> - l.file = storage->file;
> - l.index = storage->index;
> - l.reladdr = NULL;
> - l.writemask = WRITEMASK_XYZW;
> -
> - for (i = 0; i < type_size(param->type); i++) {
> - emit_asm(ir, TGSI_OPCODE_MOV, l, r);
> - l.index++;
> - r.index++;
> - }
> - }
> - }
> -
> - /* Emit call instruction */
> - call_inst = emit_asm(ir, TGSI_OPCODE_CAL);
> - call_inst->function = entry;
> -
> - /* Process out parameters. */
> - foreach_two_lists(formal_node, &sig->parameters,
> - actual_node, &ir->actual_parameters) {
> - ir_rvalue *param_rval = (ir_rvalue *) actual_node;
> - ir_variable *param = (ir_variable *) formal_node;
> -
> - if (param->data.mode == ir_var_function_out ||
> - param->data.mode == ir_var_function_inout) {
> - variable_storage *storage = find_variable_storage(param);
> - assert(storage);
> -
> - st_src_reg r;
> - r.file = storage->file;
> - r.index = storage->index;
> - r.reladdr = NULL;
> - r.swizzle = SWIZZLE_NOOP;
> - r.negate = 0;
> -
> - param_rval->accept(this);
> - st_dst_reg l = st_dst_reg(this->result);
> -
> - for (i = 0; i < type_size(param->type); i++) {
> - emit_asm(ir, TGSI_OPCODE_MOV, l, r);
> - l.index++;
> - r.index++;
> - }
> - }
> - }
> -
> - /* Process return value. */
> - this->result = entry->return_reg;
> }
>
> void
> glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *tail,
> unsigned *array_elements,
> uint16_t *index,
> st_src_reg *indirect,
> unsigned *location)
> {
> switch (tail->ir_type) {
> @@ -4318,39 +4169,21 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
> }
>
> inst->tex_type = ir->type->base_type;
>
> this->result = result_src;
> }
>
> void
> glsl_to_tgsi_visitor::visit(ir_return *ir)
> {
> - if (ir->get_value()) {
> - st_dst_reg l;
> - int i;
> -
> - assert(current_function);
> -
> - ir->get_value()->accept(this);
> - st_src_reg r = this->result;
> -
> - l = st_dst_reg(current_function->return_reg);
> -
> - for (i = 0; i < type_size(current_function->sig->return_type); i++) {
> - emit_asm(ir, TGSI_OPCODE_MOV, l, r);
> - l.index++;
> - r.index++;
> - }
> - }
> -
> - emit_asm(ir, TGSI_OPCODE_RET);
> + assert(0);
> }
>
> void
> glsl_to_tgsi_visitor::visit(ir_discard *ir)
> {
> if (ir->condition) {
> ir->condition->accept(this);
> st_src_reg condition = this->result;
>
> /* Convert the bool condition to a float so we can negate. */
> @@ -4428,23 +4261,21 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
>
> result.file = PROGRAM_UNDEFINED;
> next_temp = 1;
> array_sizes = NULL;
> max_num_arrays = 0;
> next_array = 0;
> num_inputs = 0;
> num_outputs = 0;
> num_input_arrays = 0;
> num_output_arrays = 0;
> - next_signature_id = 1;
> num_immediates = 0;
> - current_function = NULL;
> num_address_regs = 0;
> samplers_used = 0;
> buffers_used = 0;
> images_used = 0;
> indirect_addr_consts = false;
> wpos_transform_const = -1;
> glsl_version = 0;
> native_integers = false;
> mem_ctx = ralloc_context(NULL);
> ctx = NULL;
> @@ -4577,25 +4408,22 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
>
> memset(outputWrites, 0, sizeof(outputWrites));
>
> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
> unsigned prevWriteMask = 0;
>
> /* Give up if we encounter relative addressing or flow control. */
> if (inst->dst[0].reladdr || inst->dst[0].reladdr2 ||
> inst->dst[1].reladdr || inst->dst[1].reladdr2 ||
> tgsi_get_opcode_info(inst->op)->is_branch ||
> - inst->op == TGSI_OPCODE_BGNSUB ||
> inst->op == TGSI_OPCODE_CONT ||
> - inst->op == TGSI_OPCODE_END ||
> - inst->op == TGSI_OPCODE_ENDSUB ||
> - inst->op == TGSI_OPCODE_RET) {
> + inst->op == TGSI_OPCODE_END) {
> break;
> }
>
> if (inst->dst[0].file == PROGRAM_OUTPUT) {
> assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites));
> prevWriteMask = outputWrites[inst->dst[0].index];
> outputWrites[inst->dst[0].index] |= inst->dst[0].writemask;
> } else if (inst->dst[0].file == PROGRAM_TEMPORARY) {
> if (inst->dst[0].index >= tempWritesSize) {
> const int inc = 4096;
> @@ -5305,21 +5133,21 @@ struct st_translate {
> struct ureg_src shared_memory;
> unsigned *array_sizes;
> struct inout_decl *input_decls;
> unsigned num_input_decls;
> struct inout_decl *output_decls;
> unsigned num_output_decls;
>
> const GLuint *inputMapping;
> const GLuint *outputMapping;
>
> - /* For every instruction that contains a label (eg CALL), keep
> + /* For every instruction that contains a label, keep
> * details so that we can go back afterwards and emit the correct
> * tgsi instruction number for each label.
> */
> struct label *labels;
> unsigned labels_size;
> unsigned labels_count;
>
> /* Keep a record of the tgsi instruction number that each mesa
> * instruction starts at, will be used to fix up labels after
> * translation.
> @@ -5737,31 +5565,29 @@ compile_tgsi_instruction(struct st_translate *t,
> for (i = 0; i < num_dst; i++)
> dst[i] = translate_dst(t,
> &inst->dst[i],
> inst->saturate);
>
> for (i = 0; i < num_src; i++)
> src[i] = translate_src(t, &inst->src[i]);
>
> switch(inst->op) {
> case TGSI_OPCODE_BGNLOOP:
> - case TGSI_OPCODE_CAL:
> case TGSI_OPCODE_ELSE:
> case TGSI_OPCODE_ENDLOOP:
> case TGSI_OPCODE_IF:
> case TGSI_OPCODE_UIF:
> assert(num_dst == 0);
> ureg_label_insn(ureg,
> inst->op,
> src, num_src,
> - get_label(t,
> - inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
> + get_label(t, 0));
> return;
>
> case TGSI_OPCODE_TEX:
> case TGSI_OPCODE_TXB:
> case TGSI_OPCODE_TXD:
> case TGSI_OPCODE_TXL:
> case TGSI_OPCODE_TXP:
> case TGSI_OPCODE_TXQ:
> case TGSI_OPCODE_TXQS:
> case TGSI_OPCODE_TXF:
> @@ -6557,21 +6383,20 @@ out:
> * generating Mesa IR.
> */
> static struct gl_program *
> get_mesa_program_tgsi(struct gl_context *ctx,
> struct gl_shader_program *shader_program,
> struct gl_linked_shader *shader)
> {
> glsl_to_tgsi_visitor* v;
> struct gl_program *prog;
> GLenum target = _mesa_shader_stage_to_program(shader->Stage);
> - bool progress;
> struct gl_shader_compiler_options *options =
> &ctx->Const.ShaderCompilerOptions[shader->Stage];
> struct pipe_screen *pscreen = ctx->st->pipe->screen;
> enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage);
>
> validate_ir_tree(shader->ir);
>
> prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
> if (!prog)
> return NULL;
> @@ -6593,47 +6418,20 @@ get_mesa_program_tgsi(struct gl_context *ctx,
> _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
> _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
> prog->Parameters);
>
> /* Remove reads from output registers. */
> lower_output_reads(shader->Stage, shader->ir);
>
> /* Emit intermediate IR for main(). */
> visit_exec_list(shader->ir, v);
>
> - /* Now emit bodies for any functions that were used. */
> - do {
> - progress = GL_FALSE;
> -
> - foreach_in_list(function_entry, entry, &v->function_signatures) {
> - if (!entry->bgn_inst) {
> - v->current_function = entry;
> -
> - entry->bgn_inst = v->emit_asm(NULL, TGSI_OPCODE_BGNSUB);
> - entry->bgn_inst->function = entry;
> -
> - visit_exec_list(&entry->sig->body, v);
> -
> - glsl_to_tgsi_instruction *last;
> - last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
> - if (last->op != TGSI_OPCODE_RET)
> - v->emit_asm(NULL, TGSI_OPCODE_RET);
> -
> - glsl_to_tgsi_instruction *end;
> - end = v->emit_asm(NULL, TGSI_OPCODE_ENDSUB);
> - end->function = entry;
> -
> - progress = GL_TRUE;
> - }
> - }
> - } while (progress);
> -
> #if 0
> /* Print out some information (for debugging purposes) used by the
> * optimization passes. */
> {
> int i;
> int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
> int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
> int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
> int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
>
>
More information about the mesa-dev
mailing list