[Mesa-dev] [PATCH 3/7] glsl/lower_if: don't lower branches touching tess control outputs
Francisco Jerez
currojerez at riseup.net
Thu Nov 3 19:47:53 UTC 2016
Ian Romanick <idr at freedesktop.org> writes:
> On 10/28/2016 04:13 PM, Marek Olšák wrote:
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> ---
>> src/compiler/glsl/ir_optimization.h | 3 ++-
>> src/compiler/glsl/lower_if_to_cond_assign.cpp | 23 ++++++++++++++++++++---
>> src/compiler/glsl/test_optpass.cpp | 2 +-
>> src/mesa/drivers/dri/i965/brw_link.cpp | 2 +-
>> src/mesa/program/ir_to_mesa.cpp | 3 ++-
>> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
>> 6 files changed, 28 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
>> index 6f2bc32..c033f6b 100644
>> --- a/src/compiler/glsl/ir_optimization.h
>> +++ b/src/compiler/glsl/ir_optimization.h
>> @@ -101,21 +101,22 @@ bool do_dead_code(exec_list *instructions, bool uniform_locations_assigned);
>> bool do_dead_code_local(exec_list *instructions);
>> bool do_dead_code_unlinked(exec_list *instructions);
>> bool do_dead_functions(exec_list *instructions);
>> bool opt_flip_matrices(exec_list *instructions);
>> bool do_function_inlining(exec_list *instructions);
>> bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false);
>> bool do_lower_texture_projection(exec_list *instructions);
>> bool do_if_simplification(exec_list *instructions);
>> bool opt_flatten_nested_if_blocks(exec_list *instructions);
>> bool do_discard_simplification(exec_list *instructions);
>> -bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0);
>> +bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
>> + unsigned max_depth = 0);
>> bool do_mat_op_to_vec(exec_list *instructions);
>> bool do_minmax_prune(exec_list *instructions);
>> bool do_noop_swizzle(exec_list *instructions);
>> bool do_structure_splitting(exec_list *instructions);
>> bool do_swizzle_swizzle(exec_list *instructions);
>> bool do_vectorize(exec_list *instructions);
>> bool do_tree_grafting(exec_list *instructions);
>> bool do_vec_index_to_cond_assign(exec_list *instructions);
>> bool do_vec_index_to_swizzle(exec_list *instructions);
>> bool lower_discard(exec_list *instructions);
>> diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp
>> index 01a7335..a413306 100644
>> --- a/src/compiler/glsl/lower_if_to_cond_assign.cpp
>> +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp
>> @@ -47,56 +47,60 @@
>>
>> #include "compiler/glsl_types.h"
>> #include "ir.h"
>> #include "util/set.h"
>> #include "util/hash_table.h" /* Needed for the hashing functions */
>>
>> namespace {
>>
>> class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
>> public:
>> - ir_if_to_cond_assign_visitor(unsigned max_depth)
>> + ir_if_to_cond_assign_visitor(gl_shader_stage stage,
>> + unsigned max_depth)
>> {
>> this->progress = false;
>> + this->stage = stage;
>> this->max_depth = max_depth;
>> this->depth = 0;
>>
>> this->condition_variables =
>> _mesa_set_create(NULL, _mesa_hash_pointer,
>> _mesa_key_pointer_equal);
>> }
>>
>> ~ir_if_to_cond_assign_visitor()
>> {
>> _mesa_set_destroy(this->condition_variables, NULL);
>> }
>>
>> ir_visitor_status visit_enter(ir_if *);
>> ir_visitor_status visit_leave(ir_if *);
>>
>> bool found_unsupported_op;
>> bool progress;
>> + gl_shader_stage stage;
>> unsigned max_depth;
>> unsigned depth;
>>
>> struct set *condition_variables;
>> };
>>
>> } /* anonymous namespace */
>>
>> bool
>> -lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth)
>> +lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
>> + unsigned max_depth)
>> {
>> if (max_depth == UINT_MAX)
>> return false;
>>
>> - ir_if_to_cond_assign_visitor v(max_depth);
>> + ir_if_to_cond_assign_visitor v(stage, max_depth);
>>
>> visit_list_elements(&v, instructions);
>>
>> return v.progress;
>> }
>>
>> void
>> check_ir_node(ir_instruction *ir, void *data)
>> {
>> ir_if_to_cond_assign_visitor *v = (ir_if_to_cond_assign_visitor *)data;
>> @@ -105,20 +109,33 @@ check_ir_node(ir_instruction *ir, void *data)
>> case ir_type_call:
>> case ir_type_discard:
>> case ir_type_loop:
>> case ir_type_loop_jump:
>> case ir_type_return:
>> case ir_type_emit_vertex:
>> case ir_type_end_primitive:
>> case ir_type_barrier:
>> v->found_unsupported_op = true;
>> break;
>> +
>> + case ir_type_dereference_variable: {
>> + ir_variable *var = ir->as_dereference_variable()->variable_referenced();
>> +
>> + /* Tess control shader outputs are like shared memory with complex
>> + * side effects, so treat it that way.
>> + */
>> + if (v->stage == MESA_SHADER_TESS_CTRL &&
>> + var->data.mode == ir_var_shader_out)
>> + v->found_unsupported_op = true;
>
> Hmm... it seems like anything that modifies shared static (shared
> tessellation data, shared compute, atomic, image, and SSBOs) should
> probably disable this. Reads should be fine.
>
Yeah, you're right, but I believe that at least atomic counters and
images will already cause the optimization pass to bail because they can
only be modified using GLSL IR intrinsics, other kinds of shared data
probably need special handling.
> Usually Curro has better informed opinions about such things.
>
>> + break;
>> + }
>> +
>> default:
>> break;
>> }
>> }
>>
>> void
>> move_block_to_cond_assign(void *mem_ctx,
>> ir_if *if_ir, ir_rvalue *cond_expr,
>> exec_list *instructions,
>> struct set *set)
>> diff --git a/src/compiler/glsl/test_optpass.cpp b/src/compiler/glsl/test_optpass.cpp
>> index 852af19..4d0bcc2 100644
>> --- a/src/compiler/glsl/test_optpass.cpp
>> +++ b/src/compiler/glsl/test_optpass.cpp
>> @@ -92,21 +92,21 @@ do_optimization(struct exec_list *ir, const char *optimization,
>> "do_lower_jumps ( %d , %d , %d , %d , %d ) ",
>> &int_0, &int_1, &int_2, &int_3, &int_4) == 5) {
>> return do_lower_jumps(ir, int_0 != 0, int_1 != 0, int_2 != 0,
>> int_3 != 0, int_4 != 0);
>> } else if (strcmp(optimization, "do_lower_texture_projection") == 0) {
>> return do_lower_texture_projection(ir);
>> } else if (strcmp(optimization, "do_if_simplification") == 0) {
>> return do_if_simplification(ir);
>> } else if (sscanf(optimization, "lower_if_to_cond_assign ( %d ) ",
>> &int_0) == 1) {
>> - return lower_if_to_cond_assign(ir, int_0);
>> + return lower_if_to_cond_assign(MESA_SHADER_VERTEX, ir, int_0);
>> } else if (strcmp(optimization, "do_mat_op_to_vec") == 0) {
>> return do_mat_op_to_vec(ir);
>> } else if (strcmp(optimization, "do_noop_swizzle") == 0) {
>> return do_noop_swizzle(ir);
>> } else if (strcmp(optimization, "do_structure_splitting") == 0) {
>> return do_structure_splitting(ir);
>> } else if (strcmp(optimization, "do_swizzle_swizzle") == 0) {
>> return do_swizzle_swizzle(ir);
>> } else if (strcmp(optimization, "do_tree_grafting") == 0) {
>> return do_tree_grafting(ir);
>> diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
>> index f75b384..dc38dad 100644
>> --- a/src/mesa/drivers/dri/i965/brw_link.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_link.cpp
>> @@ -118,21 +118,21 @@ process_glsl_ir(struct brw_context *brw,
>> INSERT_TO_SHIFTS |
>> REVERSE_TO_SHIFTS;
>> }
>>
>> lower_instructions(shader->ir, instructions_to_lower);
>>
>> /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
>> * if-statements need to be flattened.
>> */
>> if (brw->gen < 6)
>> - lower_if_to_cond_assign(shader->ir, 16);
>> + lower_if_to_cond_assign(shader->Stage, shader->ir, 16);
>>
>> do_lower_texture_projection(shader->ir);
>> brw_lower_texture_gradients(brw, shader->ir);
>> do_vec_index_to_cond_assign(shader->ir);
>> lower_vector_insert(shader->ir, true);
>> lower_offset_arrays(shader->ir);
>> lower_noise(shader->ir);
>> lower_quadop_vector(shader->ir, false);
>>
>> do_copy_propagation(shader->ir);
>> diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
>> index 5776d15..1d20e0c 100644
>> --- a/src/mesa/program/ir_to_mesa.cpp
>> +++ b/src/mesa/program/ir_to_mesa.cpp
>> @@ -2985,21 +2985,22 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
>>
>> progress = do_common_optimization(ir, true, true,
>> options, ctx->Const.NativeIntegers)
>> || progress;
>>
>> progress = lower_quadop_vector(ir, true) || progress;
>>
>> if (options->MaxIfDepth == 0)
>> progress = lower_discard(ir) || progress;
>>
>> - progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
>> + progress = lower_if_to_cond_assign((gl_shader_stage)i, ir,
>> + options->MaxIfDepth) || progress;
>>
>> progress = lower_noise(ir) || progress;
>>
>> /* If there are forms of indirect addressing that the driver
>> * cannot handle, perform the lowering pass.
>> */
>> if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
>> || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
>> progress =
>> lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index 5b53c40..90e9e88 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -6855,21 +6855,22 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
>>
>> do {
>> progress = false;
>>
>> progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
>>
>> progress = do_common_optimization(ir, true, true, options,
>> ctx->Const.NativeIntegers)
>> || progress;
>>
>> - progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
>> + progress = lower_if_to_cond_assign((gl_shader_stage)i, ir,
>> + options->MaxIfDepth) || progress;
>>
>> } while (progress);
>>
>> validate_ir_tree(ir);
>> }
>>
>> build_program_resource_list(ctx, prog);
>>
>> for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
>> struct gl_program *linked_prog;
>>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20161103/956f5fb5/attachment-0001.sig>
More information about the mesa-dev
mailing list