[Mesa-dev] [PATCH 3/7] glsl/lower_if: don't lower branches touching tess control outputs

Fri Nov 4 11:37:29 UTC 2016

On Thu, Nov 3, 2016 at 8:47 PM, Francisco Jerez <currojerez at riseup.net> wrote:
> Ian Romanick <idr at freedesktop.org> writes:
>
>> On 10/28/2016 04:13 PM, Marek Olšák wrote:
>>> From: Marek Olšák <marek.olsak at amd.com>
>>>
>>> ---
>>>  src/compiler/glsl/ir_optimization.h           |  3 ++-
>>>  src/compiler/glsl/lower_if_to_cond_assign.cpp | 23 ++++++++++++++++++++---
>>>  src/compiler/glsl/test_optpass.cpp            |  2 +-
>>>  src/mesa/drivers/dri/i965/brw_link.cpp        |  2 +-
>>>  src/mesa/program/ir_to_mesa.cpp               |  3 ++-
>>>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp    |  3 ++-
>>>  6 files changed, 28 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
>>> index 6f2bc32..c033f6b 100644
>>> --- a/src/compiler/glsl/ir_optimization.h
>>> +++ b/src/compiler/glsl/ir_optimization.h
>>> @@ -101,21 +101,22 @@ bool do_dead_code(exec_list *instructions, bool uniform_locations_assigned);
>>>  bool do_dead_code_local(exec_list *instructions);
>>>  bool do_dead_code_unlinked(exec_list *instructions);
>>>  bool do_dead_functions(exec_list *instructions);
>>>  bool opt_flip_matrices(exec_list *instructions);
>>>  bool do_function_inlining(exec_list *instructions);
>>>  bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false);
>>>  bool do_lower_texture_projection(exec_list *instructions);
>>>  bool do_if_simplification(exec_list *instructions);
>>>  bool opt_flatten_nested_if_blocks(exec_list *instructions);
>>>  bool do_discard_simplification(exec_list *instructions);
>>> -bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0);
>>> +bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
>>> +                             unsigned max_depth = 0);
>>>  bool do_mat_op_to_vec(exec_list *instructions);
>>>  bool do_minmax_prune(exec_list *instructions);
>>>  bool do_noop_swizzle(exec_list *instructions);
>>>  bool do_structure_splitting(exec_list *instructions);
>>>  bool do_swizzle_swizzle(exec_list *instructions);
>>>  bool do_vectorize(exec_list *instructions);
>>>  bool do_tree_grafting(exec_list *instructions);
>>>  bool do_vec_index_to_cond_assign(exec_list *instructions);
>>>  bool do_vec_index_to_swizzle(exec_list *instructions);
>>>  bool lower_discard(exec_list *instructions);
>>> diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp
>>> index 01a7335..a413306 100644
>>> --- a/src/compiler/glsl/lower_if_to_cond_assign.cpp
>>> +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp
>>> @@ -47,56 +47,60 @@
>>>
>>>  #include "compiler/glsl_types.h"
>>>  #include "ir.h"
>>>  #include "util/set.h"
>>>  #include "util/hash_table.h" /* Needed for the hashing functions */
>>>
>>>  namespace {
>>>
>>>  class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
>>>  public:
>>> -   ir_if_to_cond_assign_visitor(unsigned max_depth)
>>> +   ir_if_to_cond_assign_visitor(gl_shader_stage stage,
>>> +                                unsigned max_depth)
>>>     {
>>>        this->progress = false;
>>> +      this->stage = stage;
>>>        this->max_depth = max_depth;
>>>        this->depth = 0;
>>>
>>>        this->condition_variables =
>>>              _mesa_set_create(NULL, _mesa_hash_pointer,
>>>                                      _mesa_key_pointer_equal);
>>>     }
>>>
>>>     ~ir_if_to_cond_assign_visitor()
>>>     {
>>>        _mesa_set_destroy(this->condition_variables, NULL);
>>>     }
>>>
>>>     ir_visitor_status visit_enter(ir_if *);
>>>     ir_visitor_status visit_leave(ir_if *);
>>>
>>>     bool found_unsupported_op;
>>>     bool progress;
>>> +   gl_shader_stage stage;
>>>     unsigned max_depth;
>>>     unsigned depth;
>>>
>>>     struct set *condition_variables;
>>>  };
>>>
>>>  } /* anonymous namespace */
>>>
>>>  bool
>>> -lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth)
>>> +lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
>>> +                        unsigned max_depth)
>>>  {
>>>     if (max_depth == UINT_MAX)
>>>        return false;
>>>
>>> -   ir_if_to_cond_assign_visitor v(max_depth);
>>> +   ir_if_to_cond_assign_visitor v(stage, max_depth);
>>>
>>>     visit_list_elements(&v, instructions);
>>>
>>>     return v.progress;
>>>  }
>>>
>>>  void
>>>  check_ir_node(ir_instruction *ir, void *data)
>>>  {
>>>     ir_if_to_cond_assign_visitor *v = (ir_if_to_cond_assign_visitor *)data;
>>> @@ -105,20 +109,33 @@ check_ir_node(ir_instruction *ir, void *data)
>>>     case ir_type_call:
>>>     case ir_type_discard:
>>>     case ir_type_loop:
>>>     case ir_type_loop_jump:
>>>     case ir_type_return:
>>>     case ir_type_emit_vertex:
>>>     case ir_type_end_primitive:
>>>     case ir_type_barrier:
>>>        v->found_unsupported_op = true;
>>>        break;
>>> +
>>> +   case ir_type_dereference_variable: {
>>> +      ir_variable *var = ir->as_dereference_variable()->variable_referenced();
>>> +
>>> +      /* Tess control shader outputs are like shared memory with complex
>>> +       * side effects, so treat it that way.
>>> +       */
>>> +      if (v->stage == MESA_SHADER_TESS_CTRL &&
>>> +          var->data.mode == ir_var_shader_out)
>>> +         v->found_unsupported_op = true;
>>
>> Hmm... it seems like anything that modifies shared static (shared
>> tessellation data, shared compute, atomic, image, and SSBOs) should
>> probably disable this.  Reads should be fine.
>>
>
> Yeah, you're right, but I believe that at least atomic counters and
> images will already cause the optimization pass to bail because they can
> only be modified using GLSL IR intrinsics, other kinds of shared data
> probably need special handling.

All drivers lower shared variables to intrinsics.

Marek