[Mesa-dev] [PATCH 099/133] nir: Vectorize intrinsics

Jason Ekstrand jason at jlekstrand.net
Mon Jan 5 22:05:20 PST 2015


On Sun, Jan 4, 2015 at 8:56 PM, Connor Abbott <cwabbott0 at gmail.com> wrote:

> Reviewed-by: Connor Abbott <cwabbott0 at gmail.com>
>
> Nice to see that this idea worked out well!
>
> On Tue, Dec 16, 2014 at 1:11 AM, Jason Ekstrand <jason at jlekstrand.net>
> wrote:
>
>> We used to have the number of components built into the intrinsic.  This
>> meant that all of our load/store intrinsics had vec1, vec2, vec3, and vec4
>> variants.  This lead to piles of switch statements to generate the correct
>> texture names, and introspection to figure out the number of components.
>>
>
>
This doesn't touch textures, I think you can just delete "texture" and
> it'll make more sense.
>

Yeah, I think s/texture/intrinsic/ would be the thing to do


>
>
>> We can make things much nicer by allowing "vectorized" intrinsics.
>> ---
>>  src/glsl/nir/glsl_to_nir.cpp             |  60 ++++------------
>>  src/glsl/nir/nir.h                       |  15 +++-
>>  src/glsl/nir/nir_intrinsics.h            |  79 +++++++--------------
>>  src/glsl/nir/nir_lower_io.c              | 115
>> +++++++------------------------
>>  src/glsl/nir/nir_lower_locals_to_regs.c  |  18 ++---
>>  src/glsl/nir/nir_lower_system_values.c   |   3 +-
>>  src/glsl/nir/nir_lower_variables.c       |  74 +++++++-------------
>>  src/glsl/nir/nir_validate.c              |  10 +--
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp |  64 +++++------------
>>  9 files changed, 123 insertions(+), 315 deletions(-)
>>
>> diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
>> index f85b50e..088a8e9 100644
>> --- a/src/glsl/nir/glsl_to_nir.cpp
>> +++ b/src/glsl/nir/glsl_to_nir.cpp
>> @@ -629,7 +629,8 @@ nir_visitor::visit(ir_call *ir)
>>        nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
>>
>>        nir_intrinsic_instr *store_instr =
>> -         nir_intrinsic_instr_create(shader,
>> nir_intrinsic_store_var_vec1);
>> +         nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
>> +      store_instr->num_components = 1;
>>
>>        ir->return_deref->accept(this);
>>        store_instr->variables[0] = this->deref_head;
>> @@ -704,17 +705,9 @@ nir_visitor::visit(ir_assignment *ir)
>>         * back into the LHS. Copy propagation should get rid of the mess.
>>         */
>>
>> -      nir_intrinsic_op load_op;
>> -      switch (ir->lhs->type->vector_elements) {
>> -         case 1: load_op = nir_intrinsic_load_var_vec1; break;
>> -         case 2: load_op = nir_intrinsic_load_var_vec2; break;
>> -         case 3: load_op = nir_intrinsic_load_var_vec3; break;
>> -         case 4: load_op = nir_intrinsic_load_var_vec4; break;
>> -         default: unreachable("Invalid number of components"); break;
>> -      }
>> -
>> -      nir_intrinsic_instr *load =
>> nir_intrinsic_instr_create(this->shader,
>> -                                                             load_op);
>> +      nir_intrinsic_instr *load =
>> +         nir_intrinsic_instr_create(this->shader,
>> nir_intrinsic_load_var);
>> +      load->num_components = ir->lhs->type->vector_elements;
>>        load->dest.is_ssa = true;
>>        nir_ssa_def_init(&load->instr, &load->dest.ssa,
>>                         num_components, NULL);
>> @@ -759,17 +752,9 @@ nir_visitor::visit(ir_assignment *ir)
>>        src.ssa = &vec->dest.dest.ssa;
>>     }
>>
>> -   nir_intrinsic_op store_op;
>> -   switch (ir->lhs->type->vector_elements) {
>> -      case 1: store_op = nir_intrinsic_store_var_vec1; break;
>> -      case 2: store_op = nir_intrinsic_store_var_vec2; break;
>> -      case 3: store_op = nir_intrinsic_store_var_vec3; break;
>> -      case 4: store_op = nir_intrinsic_store_var_vec4; break;
>> -      default: unreachable("Invalid number of components"); break;
>> -   }
>> -
>> -   nir_intrinsic_instr *store = nir_intrinsic_instr_create(this->shader,
>> -                                                           store_op);
>> +   nir_intrinsic_instr *store =
>> +      nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
>> +   store->num_components = ir->lhs->type->vector_elements;
>>     nir_deref *store_deref = nir_copy_deref(this->shader,
>> &lhs_deref->deref);
>>     store->variables[0] = nir_deref_as_var(store_deref);
>>     store->src[0] = src;
>> @@ -848,17 +833,9 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
>>         * must emit a variable load.
>>         */
>>
>> -      nir_intrinsic_op load_op;
>> -      switch (ir->type->vector_elements) {
>> -      case 1: load_op = nir_intrinsic_load_var_vec1; break;
>> -      case 2: load_op = nir_intrinsic_load_var_vec2; break;
>> -      case 3: load_op = nir_intrinsic_load_var_vec3; break;
>> -      case 4: load_op = nir_intrinsic_load_var_vec4; break;
>> -      default: unreachable("Invalid number of components");
>> -      }
>> -
>>        nir_intrinsic_instr *load_instr =
>> -         nir_intrinsic_instr_create(this->shader, load_op);
>> +         nir_intrinsic_instr_create(this->shader,
>> nir_intrinsic_load_var);
>> +      load_instr->num_components = ir->type->vector_elements;
>>        load_instr->variables[0] = this->deref_head;
>>        add_instr(&load_instr->instr, ir->type->vector_elements);
>>     }
>> @@ -917,23 +894,12 @@ nir_visitor::visit(ir_expression *ir)
>>
>>        nir_intrinsic_op op;
>>        if (const_index) {
>> -         switch (ir->type->vector_elements) {
>> -            case 1: op = nir_intrinsic_load_ubo_vec1; break;
>> -            case 2: op = nir_intrinsic_load_ubo_vec2; break;
>> -            case 3: op = nir_intrinsic_load_ubo_vec3; break;
>> -            case 4: op = nir_intrinsic_load_ubo_vec4; break;
>> -            default: assert(0); break;
>> -         }
>> +         op = nir_intrinsic_load_ubo;
>>        } else {
>> -         switch (ir->type->vector_elements) {
>> -            case 1: op = nir_intrinsic_load_ubo_vec1_indirect; break;
>> -            case 2: op = nir_intrinsic_load_ubo_vec2_indirect; break;
>> -            case 3: op = nir_intrinsic_load_ubo_vec3_indirect; break;
>> -            case 4: op = nir_intrinsic_load_ubo_vec4_indirect; break;
>> -            default: assert(0); break;
>> -         }
>> +         op = nir_intrinsic_load_ubo_indirect;
>>        }
>>        nir_intrinsic_instr *load =
>> nir_intrinsic_instr_create(this->shader, op);
>> +      load->num_components = ir->type->vector_elements;
>>        load->const_index[0] = ir->operands[0]->as_constant()->value.u[0];
>>        load->const_index[1] = const_index ? const_index->value.u[0] : 0;
>> /* base offset */
>>        load->const_index[2] = 1; /* number of vec4's */
>> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>> index 30146d6..412ceea 100644
>> --- a/src/glsl/nir/nir.h
>> +++ b/src/glsl/nir/nir.h
>> @@ -693,6 +693,9 @@ typedef struct {
>>
>>     nir_dest dest;
>>
>> +   /** number of components if this is a vectorized intrinsic */
>> +   uint8_t num_components;
>> +
>>     int const_index[3];
>>
>>     nir_deref_var *variables[2];
>> @@ -732,12 +735,20 @@ typedef struct {
>>
>>     unsigned num_srcs; /** < number of register/SSA inputs */
>>
>> -   /** number of components of each input register */
>> +   /** number of components of each input register
>> +    *
>> +    * If this value is 0, the number of components is given by the
>> +    * num_components field of nir_intrinsic_instr.
>> +    */
>>     unsigned src_components[NIR_INTRINSIC_MAX_INPUTS];
>>
>>     bool has_dest;
>>
>> -   /** number of components of each output register */
>> +   /** number of components of the output register
>> +    *
>> +    * If this value is 0, the number of components is given by the
>> +    * num_components field of nir_intrinsic_instr.
>> +    */
>>     unsigned dest_components;
>>
>>     /** the number of inputs/outputs that are variables */
>> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
>> index e4ad8cd..75bd12f 100644
>> --- a/src/glsl/nir/nir_intrinsics.h
>> +++ b/src/glsl/nir/nir_intrinsics.h
>> @@ -42,19 +42,9 @@
>>  #define ARR(...) { __VA_ARGS__ }
>>
>>
>> -INTRINSIC(load_var_vec1,   0, ARR(), true, 1, 1, 0,
>> -          NIR_INTRINSIC_CAN_ELIMINATE)
>> -INTRINSIC(load_var_vec2,   0, ARR(), true, 2, 1, 0,
>> -          NIR_INTRINSIC_CAN_ELIMINATE)
>> -INTRINSIC(load_var_vec3,   0, ARR(), true, 3, 1, 0,
>> -          NIR_INTRINSIC_CAN_ELIMINATE)
>> -INTRINSIC(load_var_vec4,   0, ARR(), true, 4, 1, 0,
>> -          NIR_INTRINSIC_CAN_ELIMINATE)
>> -INTRINSIC(store_var_vec1, 1, ARR(1), false, 0, 1, 0, 0)
>> -INTRINSIC(store_var_vec2, 1, ARR(2), false, 0, 1, 0, 0)
>> -INTRINSIC(store_var_vec3, 1, ARR(3), false, 0, 1, 0, 0)
>> -INTRINSIC(store_var_vec4, 1, ARR(4), false, 0, 1, 0, 0)
>> -INTRINSIC(copy_var,       0, ARR(),  false, 0, 2, 0, 0)
>> +INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
>> +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
>> +INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
>>
>>  /*
>>   * a barrier is an intrinsic with no inputs/outputs but which can't be
>> moved
>> @@ -94,27 +84,6 @@ SYSTEM_VALUE(sample_pos, 2)
>>  SYSTEM_VALUE(sample_mask_in, 1)
>>  SYSTEM_VALUE(invocation_id, 1)
>>
>> -#define LOAD_OR_INTERP(name, num_srcs, src_comps, num_indices, flags) \
>> -   INTRINSIC(name##_vec1, num_srcs, ARR(src_comps), true, 1, \
>> -             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
>> -   INTRINSIC(name##_vec2, num_srcs, ARR(src_comps), true, 2, \
>> -             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
>> -   INTRINSIC(name##_vec3, num_srcs, ARR(src_comps), true, 3, \
>> -             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
>> -   INTRINSIC(name##_vec4, num_srcs, ARR(src_comps), true, 4, \
>> -             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
>> -   INTRINSIC(name##_vec1_indirect, 1 + num_srcs, ARR(1, src_comps),
>> true, 1, \
>> -             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
>> -   INTRINSIC(name##_vec2_indirect, 1 + num_srcs, ARR(1, src_comps),
>> true, 2, \
>> -             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
>> -   INTRINSIC(name##_vec3_indirect, 1 + num_srcs, ARR(1, src_comps),
>> true, 3, \
>> -             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
>> -   INTRINSIC(name##_vec4_indirect, 1 + num_srcs, ARR(1, src_comps),
>> true, 4, \
>> -             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags)
>> -
>> -#define LOAD(name, num_indices, flags) \
>> -   LOAD_OR_INTERP(load_##name, 0, 0, num_indices, flags)
>> -
>>  /*
>>   * The first index is the address to load from, and the second index is
>> the
>>   * number of array elements to load. For UBO's (and SSBO's), the first
>> index
>> @@ -129,6 +98,12 @@ SYSTEM_VALUE(invocation_id, 1)
>>   * elements begin immediately after the previous array element.
>>   */
>>
>> +#define LOAD(name, num_indices, flags) \
>> +   INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \
>> +             NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
>> +   INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices,
>> \
>> +             NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
>> +
>>  LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER)
>>  LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER)
>>  LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
>> @@ -140,29 +115,16 @@ LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
>>   * interp_at_offset* intrinsics take a second source that is either a
>>   * sample id or a vec2 position offset.
>>   */
>> -#define INTERP(name, flags) \
>> -   LOAD_OR_INTERP(interp_##name, 0, 0, 2, flags)
>> -
>> -#define INTERP_WITH_ARG(name, src_comps, flags) \
>> -   LOAD_OR_INTERP(interp_##name, 1, src_comps, 2, flags)
>>
>> -INTERP(at_centroid, NIR_INTRINSIC_CAN_REORDER)
>> -INTERP_WITH_ARG(at_sample, 1, NIR_INTRINSIC_CAN_REORDER)
>> -INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)
>> +#define INTERP(name, num_srcs, src_comps) \
>> +   INTRINSIC(interp_##name, num_srcs, ARR(src_comps), true, \
>> +             0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE |
>> NIR_INTRINSIC_CAN_REORDER) \
>> +   INTRINSIC(interp_##name##_indirect, 1 + num_srcs, ARR(1, src_comps),
>> true, \
>> +             0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE |
>> NIR_INTRINSIC_CAN_REORDER)
>>
>> -#define STORE(name, num_indices, flags) \
>> -   INTRINSIC(store_##name##_vec1, 1, ARR(1), false, 0, 0, num_indices,
>> flags) \
>> -   INTRINSIC(store_##name##_vec2, 1, ARR(2), false, 0, 0, num_indices,
>> flags) \
>> -   INTRINSIC(store_##name##_vec3, 1, ARR(3), false, 0, 0, num_indices,
>> flags) \
>> -   INTRINSIC(store_##name##_vec4, 1, ARR(4), false, 0, 0, num_indices,
>> flags) \
>> -   INTRINSIC(store_##name##_vec1_indirect, 2, ARR(1, 1), false, 0, 0, \
>> -             num_indices, flags) \
>> -   INTRINSIC(store_##name##_vec2_indirect, 2, ARR(2, 1), false, 0, 0, \
>> -             num_indices, flags) \
>> -   INTRINSIC(store_##name##_vec3_indirect, 2, ARR(3, 1), false, 0, 0, \
>> -             num_indices, flags) \
>> -   INTRINSIC(store_##name##_vec4_indirect, 2, ARR(4, 1), false, 0, 0, \
>> -             num_indices, flags) \
>> +INTERP(at_centroid, 0, 0)
>> +INTERP(at_sample, 1, 1)
>> +INTERP(at_offset, 1, 1)
>>
>>  /*
>>   * Stores work the same way as loads, except now the first register
>> input is
>> @@ -170,7 +132,12 @@ INTERP_WITH_ARG(at_offset, 1,
>> NIR_INTRINSIC_CAN_REORDER)
>>   * offset.
>>   */
>>
>> +#define STORE(name, num_indices, flags) \
>> +   INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
>> +   INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
>> +             num_indices, flags) \
>> +
>>  STORE(output, 2, 0)
>>  /* STORE(ssbo, 3, 0) */
>>
>> -LAST_INTRINSIC(store_output_vec4_indirect)
>> +LAST_INTRINSIC(store_output_indirect)
>> diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
>> index a3b8186..ed3ce81 100644
>> --- a/src/glsl/nir/nir_lower_io.c
>> +++ b/src/glsl/nir/nir_lower_io.c
>> @@ -186,66 +186,6 @@ get_io_offset(nir_deref_var *deref, nir_instr
>> *instr, nir_src *indirect,
>>     return base_offset;
>>  }
>>
>> -static nir_intrinsic_op
>> -get_load_op(nir_variable_mode mode, bool indirect, unsigned
>> num_components)
>> -{
>> -   if (indirect) {
>> -      switch (mode) {
>> -      case nir_var_shader_in:
>> -         switch (num_components) {
>> -         case 1: return nir_intrinsic_load_input_vec1_indirect;
>> -         case 2: return nir_intrinsic_load_input_vec2_indirect;
>> -         case 3: return nir_intrinsic_load_input_vec3_indirect;
>> -         case 4: return nir_intrinsic_load_input_vec4_indirect;
>> -         default: unreachable("Invalid number of components"); break;
>> -         }
>> -         break;
>> -
>> -      case nir_var_uniform:
>> -         switch (num_components) {
>> -         case 1: return nir_intrinsic_load_uniform_vec1_indirect;
>> -         case 2: return nir_intrinsic_load_uniform_vec2_indirect;
>> -         case 3: return nir_intrinsic_load_uniform_vec3_indirect;
>> -         case 4: return nir_intrinsic_load_uniform_vec4_indirect;
>> -         default: unreachable("Invalid number of components"); break;
>> -         }
>> -         break;
>> -
>> -      default:
>> -         unreachable("Invalid input type");
>> -         break;
>> -      }
>> -   } else {
>> -      switch (mode) {
>> -      case nir_var_shader_in:
>> -         switch (num_components) {
>> -         case 1: return nir_intrinsic_load_input_vec1;
>> -         case 2: return nir_intrinsic_load_input_vec2;
>> -         case 3: return nir_intrinsic_load_input_vec3;
>> -         case 4: return nir_intrinsic_load_input_vec4;
>> -         default: unreachable("Invalid number of components"); break;
>> -         }
>> -         break;
>> -
>> -      case nir_var_uniform:
>> -         switch (num_components) {
>> -         case 1: return nir_intrinsic_load_uniform_vec1;
>> -         case 2: return nir_intrinsic_load_uniform_vec2;
>> -         case 3: return nir_intrinsic_load_uniform_vec3;
>> -         case 4: return nir_intrinsic_load_uniform_vec4;
>> -         default: unreachable("Invalid number of components"); break;
>> -         }
>> -         break;
>> -
>> -      default:
>> -         unreachable("Invalid input type");
>> -         break;
>> -      }
>> -   }
>> -
>> -   return nir_intrinsic_load_input_vec1;
>> -}
>> -
>>  static bool
>>  nir_lower_io_block(nir_block *block, void *void_state)
>>  {
>> @@ -258,22 +198,35 @@ nir_lower_io_block(nir_block *block, void
>> *void_state)
>>        nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
>>
>>        switch (intrin->intrinsic) {
>> -      case nir_intrinsic_load_var_vec1:
>> -      case nir_intrinsic_load_var_vec2:
>> -      case nir_intrinsic_load_var_vec3:
>> -      case nir_intrinsic_load_var_vec4: {
>> +      case nir_intrinsic_load_var: {
>>           nir_variable_mode mode = intrin->variables[0]->var->data.mode;
>>           if (mode != nir_var_shader_in && mode != nir_var_uniform)
>>              continue;
>>
>>           bool has_indirect = deref_has_indirect(intrin->variables[0]);
>> -         unsigned num_components =
>> -            nir_intrinsic_infos[intrin->intrinsic].dest_components;
>>
>> -         nir_intrinsic_op load_op = get_load_op(mode, has_indirect,
>> -                                                num_components);
>> +         nir_intrinsic_op load_op;
>> +         switch (mode) {
>> +         case nir_var_shader_in:
>> +            if (has_indirect) {
>> +               load_op = nir_intrinsic_load_input_indirect;
>> +            } else {
>> +               load_op = nir_intrinsic_load_input;
>> +            }
>> +            break;
>> +         case nir_var_uniform:
>> +            if (has_indirect) {
>> +               load_op = nir_intrinsic_load_uniform_indirect;
>> +            } else {
>> +               load_op = nir_intrinsic_load_uniform;
>> +            }
>> +            break;
>> +         default:
>> +            unreachable("Unknown variable mode");
>> +         }
>>           nir_intrinsic_instr *load =
>> nir_intrinsic_instr_create(state->mem_ctx,
>>                                                                  load_op);
>> +         load->num_components = intrin->num_components;
>>
>>           nir_src indirect;
>>           unsigned offset = get_io_offset(intrin->variables[0],
>> @@ -289,7 +242,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
>>           if (intrin->dest.is_ssa) {
>>              load->dest.is_ssa = true;
>>              nir_ssa_def_init(&load->instr, &load->dest.ssa,
>> -                             num_components, NULL);
>> +                             intrin->num_components, NULL);
>>
>>              nir_src new_src = {
>>                 .is_ssa = true,
>> @@ -307,38 +260,22 @@ nir_lower_io_block(nir_block *block, void
>> *void_state)
>>           break;
>>        }
>>
>> -      case nir_intrinsic_store_var_vec1:
>> -      case nir_intrinsic_store_var_vec2:
>> -      case nir_intrinsic_store_var_vec3:
>> -      case nir_intrinsic_store_var_vec4: {
>> +      case nir_intrinsic_store_var: {
>>           if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
>>              continue;
>>
>>           bool has_indirect = deref_has_indirect(intrin->variables[0]);
>> -         unsigned num_components =
>> -            nir_intrinsic_infos[intrin->intrinsic].src_components[0];
>>
>>           nir_intrinsic_op store_op;
>>           if (has_indirect) {
>> -            switch (num_components) {
>> -            case 1: store_op = nir_intrinsic_store_output_vec1_indirect;
>> break;
>> -            case 2: store_op = nir_intrinsic_store_output_vec2_indirect;
>> break;
>> -            case 3: store_op = nir_intrinsic_store_output_vec3_indirect;
>> break;
>> -            case 4: store_op = nir_intrinsic_store_output_vec4_indirect;
>> break;
>> -            default: unreachable("Invalid number of components"); break;
>> -            }
>> +            store_op = nir_intrinsic_store_output_indirect;
>>           } else {
>> -            switch (num_components) {
>> -            case 1: store_op = nir_intrinsic_store_output_vec1; break;
>> -            case 2: store_op = nir_intrinsic_store_output_vec2; break;
>> -            case 3: store_op = nir_intrinsic_store_output_vec3; break;
>> -            case 4: store_op = nir_intrinsic_store_output_vec4; break;
>> -            default: unreachable("Invalid number of components"); break;
>> -            }
>> +            store_op = nir_intrinsic_store_output;
>>           }
>>
>>           nir_intrinsic_instr *store =
>> nir_intrinsic_instr_create(state->mem_ctx,
>>
>> store_op);
>> +         store->num_components = intrin->num_components;
>>
>>           nir_src indirect;
>>           unsigned offset = get_io_offset(intrin->variables[0],
>> diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c
>> b/src/glsl/nir/nir_lower_locals_to_regs.c
>> index caf1c29..081ed6b 100644
>> --- a/src/glsl/nir/nir_lower_locals_to_regs.c
>> +++ b/src/glsl/nir/nir_lower_locals_to_regs.c
>> @@ -219,22 +219,18 @@ lower_locals_to_regs_block(nir_block *block, void
>> *void_state)
>>        nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
>>
>>        switch (intrin->intrinsic) {
>> -      case nir_intrinsic_load_var_vec1:
>> -      case nir_intrinsic_load_var_vec2:
>> -      case nir_intrinsic_load_var_vec3:
>> -      case nir_intrinsic_load_var_vec4: {
>> +      case nir_intrinsic_load_var: {
>>           if (intrin->variables[0]->var->data.mode != nir_var_local)
>>              continue;
>>
>>           nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
>> nir_op_imov);
>>           mov->src[0].src = get_deref_reg_src(intrin->variables[0],
>>                                               &intrin->instr, state);
>> -         unsigned num_components =
>> mov->src[0].src.reg.reg->num_components;
>> -         mov->dest.write_mask = (1 << num_components) - 1;
>> +         mov->dest.write_mask = (1 << intrin->num_components) - 1;
>>           if (intrin->dest.is_ssa) {
>>              mov->dest.dest.is_ssa = true;
>>              nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
>> -                             num_components, NULL);
>> +                             intrin->num_components, NULL);
>>
>>              nir_src new_src = {
>>                 .is_ssa = true,
>> @@ -252,20 +248,16 @@ lower_locals_to_regs_block(nir_block *block, void
>> *void_state)
>>           break;
>>        }
>>
>> -      case nir_intrinsic_store_var_vec1:
>> -      case nir_intrinsic_store_var_vec2:
>> -      case nir_intrinsic_store_var_vec3:
>> -      case nir_intrinsic_store_var_vec4: {
>> +      case nir_intrinsic_store_var: {
>>           if (intrin->variables[0]->var->data.mode != nir_var_local)
>>              continue;
>>
>>           nir_src reg_src = get_deref_reg_src(intrin->variables[0],
>>                                               &intrin->instr, state);
>> -         unsigned num_components = reg_src.reg.reg->num_components;
>>
>>           nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
>> nir_op_imov);
>>           mov->src[0].src = nir_src_copy(intrin->src[0], state->mem_ctx);
>> -         mov->dest.write_mask = (1 << num_components) - 1;
>> +         mov->dest.write_mask = (1 << intrin->num_components) - 1;
>>           mov->dest.dest.is_ssa = false;
>>           mov->dest.dest.reg.reg = reg_src.reg.reg;
>>           mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
>> diff --git a/src/glsl/nir/nir_lower_system_values.c
>> b/src/glsl/nir/nir_lower_system_values.c
>> index cbd1dac..e700df4 100644
>> --- a/src/glsl/nir/nir_lower_system_values.c
>> +++ b/src/glsl/nir/nir_lower_system_values.c
>> @@ -30,8 +30,7 @@
>>  static void
>>  convert_instr(nir_intrinsic_instr *instr)
>>  {
>> -   if (instr->intrinsic != nir_intrinsic_load_var_vec1 &&
>> -       instr->intrinsic != nir_intrinsic_load_var_vec2)
>> +   if (instr->intrinsic != nir_intrinsic_load_var)
>>        return;
>>
>>     nir_variable *var = instr->variables[0]->var;
>> diff --git a/src/glsl/nir/nir_lower_variables.c
>> b/src/glsl/nir/nir_lower_variables.c
>> index 052b021..dab3639 100644
>> --- a/src/glsl/nir/nir_lower_variables.c
>> +++ b/src/glsl/nir/nir_lower_variables.c
>> @@ -449,17 +449,11 @@ fill_deref_tables_block(nir_block *block, void
>> *void_state)
>>        nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
>>
>>        switch (intrin->intrinsic) {
>> -      case nir_intrinsic_load_var_vec1:
>> -      case nir_intrinsic_load_var_vec2:
>> -      case nir_intrinsic_load_var_vec3:
>> -      case nir_intrinsic_load_var_vec4:
>> +      case nir_intrinsic_load_var:
>>           register_load_instr(intrin, true, state);
>>           break;
>>
>> -      case nir_intrinsic_store_var_vec1:
>> -      case nir_intrinsic_store_var_vec2:
>> -      case nir_intrinsic_store_var_vec3:
>> -      case nir_intrinsic_store_var_vec4:
>> +      case nir_intrinsic_store_var:
>>           register_store_instr(intrin, true, state);
>>           break;
>>
>> @@ -541,17 +535,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
>>        nir_deref *src_deref = nir_copy_deref(state->mem_ctx,
>> &src_head->deref);
>>        nir_deref *dest_deref = nir_copy_deref(state->mem_ctx,
>> &dest_head->deref);
>>
>> -      nir_intrinsic_op load_op;
>> -      switch (num_components) {
>> -         case 1: load_op = nir_intrinsic_load_var_vec1; break;
>> -         case 2: load_op = nir_intrinsic_load_var_vec2; break;
>> -         case 3: load_op = nir_intrinsic_load_var_vec3; break;
>> -         case 4: load_op = nir_intrinsic_load_var_vec4; break;
>> -         default: unreachable("Invalid number of components"); break;
>> -      }
>> -
>> -      nir_intrinsic_instr *load =
>> nir_intrinsic_instr_create(state->mem_ctx,
>> -                                                             load_op);
>> +      nir_intrinsic_instr *load =
>> +         nir_intrinsic_instr_create(state->mem_ctx,
>> nir_intrinsic_load_var);
>> +      load->num_components = num_components;
>>        load->variables[0] = nir_deref_as_var(src_deref);
>>        load->dest.is_ssa = true;
>>        nir_ssa_def_init(&load->instr, &load->dest.ssa, num_components,
>> NULL);
>> @@ -559,17 +545,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
>>        nir_instr_insert_before(&copy_instr->instr, &load->instr);
>>        register_load_instr(load, false, state);
>>
>> -      nir_intrinsic_op store_op;
>> -      switch (num_components) {
>> -         case 1: store_op = nir_intrinsic_store_var_vec1; break;
>> -         case 2: store_op = nir_intrinsic_store_var_vec2; break;
>> -         case 3: store_op = nir_intrinsic_store_var_vec3; break;
>> -         case 4: store_op = nir_intrinsic_store_var_vec4; break;
>> -         default: unreachable("Invalid number of components"); break;
>> -      }
>> -
>> -      nir_intrinsic_instr *store =
>> nir_intrinsic_instr_create(state->mem_ctx,
>> -                                                              store_op);
>> +      nir_intrinsic_instr *store =
>> +         nir_intrinsic_instr_create(state->mem_ctx,
>> nir_intrinsic_store_var);
>> +      store->num_components = num_components;
>>        store->variables[0] = nir_deref_as_var(dest_deref);
>>        store->src[0].is_ssa = true;
>>        store->src[0].ssa = &load->dest.ssa;
>> @@ -782,14 +760,9 @@ lower_deref_to_ssa_block(nir_block *block, void
>> *void_state)
>>           nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
>>
>>           switch (intrin->intrinsic) {
>> -         case nir_intrinsic_load_var_vec1:
>> -         case nir_intrinsic_load_var_vec2:
>> -         case nir_intrinsic_load_var_vec3:
>> -         case nir_intrinsic_load_var_vec4: {
>> +         case nir_intrinsic_load_var: {
>>              struct deref_node *node =
>> get_deref_node(intrin->variables[0],
>>                                                       false, state);
>> -            unsigned num_chans =
>> -               nir_intrinsic_infos[intrin->intrinsic].dest_components;
>>
>>              if (node == NULL) {
>>                 /* If we hit this path then we are referencing an invalid
>> @@ -799,7 +772,8 @@ lower_deref_to_ssa_block(nir_block *block, void
>> *void_state)
>>                  */
>>                 nir_ssa_undef_instr *undef =
>>                    nir_ssa_undef_instr_create(state->mem_ctx);
>> -               nir_ssa_def_init(&undef->instr, &undef->def, num_chans,
>> NULL);
>> +               nir_ssa_def_init(&undef->instr, &undef->def,
>> +                                intrin->num_components, NULL);
>>
>>                 nir_instr_insert_before(&intrin->instr, &undef->instr);
>>                 nir_instr_remove(&intrin->instr);
>> @@ -821,14 +795,15 @@ lower_deref_to_ssa_block(nir_block *block, void
>> *void_state)
>>                                                        nir_op_imov);
>>              mov->src[0].src.is_ssa = true;
>>              mov->src[0].src.ssa = get_ssa_def_for_block(node, block,
>> state);
>> -            for (unsigned i = num_chans; i < 4; i++)
>> +            for (unsigned i = intrin->num_components; i < 4; i++)
>>                 mov->src[0].swizzle[i] = 0;
>>
>>              assert(intrin->dest.is_ssa);
>>
>> -            mov->dest.write_mask = (1 << num_chans) - 1;
>> +            mov->dest.write_mask = (1 << intrin->num_components) - 1;
>>              mov->dest.dest.is_ssa = true;
>> -            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
>> num_chans, NULL);
>> +            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
>> +                             intrin->num_components, NULL);
>>
>>              nir_instr_insert_before(&intrin->instr, &mov->instr);
>>              nir_instr_remove(&intrin->instr);
>> @@ -843,10 +818,7 @@ lower_deref_to_ssa_block(nir_block *block, void
>> *void_state)
>>              break;
>>           }
>>
>> -         case nir_intrinsic_store_var_vec1:
>> -         case nir_intrinsic_store_var_vec2:
>> -         case nir_intrinsic_store_var_vec3:
>> -         case nir_intrinsic_store_var_vec4: {
>> +         case nir_intrinsic_store_var: {
>>              struct deref_node *node =
>> get_deref_node(intrin->variables[0],
>>                                                       false, state);
>>
>> @@ -860,7 +832,8 @@ lower_deref_to_ssa_block(nir_block *block, void
>> *void_state)
>>              if (!node->lower_to_ssa)
>>                 continue;
>>
>> -            unsigned num_chans = glsl_get_vector_elements(node->type);
>> +            assert(intrin->num_components ==
>> +                   glsl_get_vector_elements(node->type));
>>
>>              assert(intrin->src[0].is_ssa);
>>
>> @@ -873,12 +846,12 @@ lower_deref_to_ssa_block(nir_block *block, void
>> *void_state)
>>
>>                 mov->src[1].src.is_ssa = true;
>>                 mov->src[1].src.ssa = intrin->src[0].ssa;
>> -               for (unsigned i = num_chans; i < 4; i++)
>> +               for (unsigned i = intrin->num_components; i < 4; i++)
>>                    mov->src[1].swizzle[i] = 0;
>>
>>                 mov->src[2].src.is_ssa = true;
>>                 mov->src[2].src.ssa = get_ssa_def_for_block(node, block,
>> state);
>> -               for (unsigned i = num_chans; i < 4; i++)
>> +               for (unsigned i = intrin->num_components; i < 4; i++)
>>                    mov->src[2].swizzle[i] = 0;
>>
>>              } else {
>> @@ -886,13 +859,14 @@ lower_deref_to_ssa_block(nir_block *block, void
>> *void_state)
>>
>>                 mov->src[0].src.is_ssa = true;
>>                 mov->src[0].src.ssa = intrin->src[0].ssa;
>> -               for (unsigned i = num_chans; i < 4; i++)
>> +               for (unsigned i = intrin->num_components; i < 4; i++)
>>                    mov->src[0].swizzle[i] = 0;
>>              }
>>
>> -            mov->dest.write_mask = (1 << num_chans) - 1;
>> +            mov->dest.write_mask = (1 << intrin->num_components) - 1;
>>              mov->dest.dest.is_ssa = true;
>> -            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
>> num_chans, NULL);
>> +            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
>> +                             intrin->num_components, NULL);
>>
>>              nir_instr_insert_before(&intrin->instr, &mov->instr);
>>              nir_instr_remove(&intrin->instr);
>> diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
>> index b8ef802..ee29fc3 100644
>> --- a/src/glsl/nir/nir_validate.c
>> +++ b/src/glsl/nir/nir_validate.c
>> @@ -338,16 +338,10 @@ validate_intrinsic_instr(nir_intrinsic_instr
>> *instr, validate_state *state)
>>     }
>>
>>     switch (instr->intrinsic) {
>> -   case nir_intrinsic_load_var_vec1:
>> -   case nir_intrinsic_load_var_vec2:
>> -   case nir_intrinsic_load_var_vec3:
>> -   case nir_intrinsic_load_var_vec4:
>> +   case nir_intrinsic_load_var:
>>        assert(instr->variables[0]->var->data.mode != nir_var_shader_out);
>>        break;
>> -   case nir_intrinsic_store_var_vec1:
>> -   case nir_intrinsic_store_var_vec2:
>> -   case nir_intrinsic_store_var_vec3:
>> -   case nir_intrinsic_store_var_vec4:
>> +   case nir_intrinsic_store_var:
>>        assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
>>               instr->variables[0]->var->data.mode != nir_var_uniform);
>>        break;
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index dbb2470..4c1805d 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -1312,14 +1312,10 @@
>> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>>        break;
>>     }
>>
>> -   case nir_intrinsic_load_uniform_vec1:
>> -   case nir_intrinsic_load_uniform_vec2:
>> -   case nir_intrinsic_load_uniform_vec3:
>> -   case nir_intrinsic_load_uniform_vec4: {
>> +   case nir_intrinsic_load_uniform: {
>>        unsigned index = 0;
>>        for (int i = 0; i < instr->const_index[1]; i++) {
>> -         for (unsigned j = 0;
>> -            j < nir_intrinsic_infos[instr->intrinsic].dest_components;
>> j++) {
>> +         for (unsigned j = 0; j < instr->num_components; j++) {
>>              fs_reg src = nir_uniforms;
>>              src.reg_offset = instr->const_index[0] + index;
>>              src.type = dest.type;
>> @@ -1335,14 +1331,10 @@
>> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>>        break;
>>     }
>>
>> -   case nir_intrinsic_load_uniform_vec1_indirect:
>> -   case nir_intrinsic_load_uniform_vec2_indirect:
>> -   case nir_intrinsic_load_uniform_vec3_indirect:
>> -   case nir_intrinsic_load_uniform_vec4_indirect: {
>> +   case nir_intrinsic_load_uniform_indirect: {
>>        unsigned index = 0;
>>        for (int i = 0; i < instr->const_index[1]; i++) {
>> -         for (unsigned j = 0;
>> -            j < nir_intrinsic_infos[instr->intrinsic].dest_components;
>> j++) {
>> +         for (unsigned j = 0; j < instr->num_components; j++) {
>>              fs_reg src = nir_uniforms;
>>              src.reg_offset = instr->const_index[0] + index;
>>              src.reladdr = new(mem_ctx)
>> fs_reg(get_nir_src(instr->src[0]));
>> @@ -1360,10 +1352,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr
>> *instr)
>>        break;
>>     }
>>
>> -   case nir_intrinsic_load_ubo_vec1:
>> -   case nir_intrinsic_load_ubo_vec2:
>> -   case nir_intrinsic_load_ubo_vec3:
>> -   case nir_intrinsic_load_ubo_vec4: {
>> +   case nir_intrinsic_load_ubo: {
>>        fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
>>                                   (unsigned) instr->const_index[0]);
>>        fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
>> @@ -1373,8 +1362,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr
>> *instr)
>>        emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
>>                                  packed_consts, surf_index,
>> const_offset_reg));
>>
>> -      for (unsigned i = 0;
>> -           i < nir_intrinsic_infos[instr->intrinsic].dest_components;
>> i++) {
>> +      for (unsigned i = 0; i < instr->num_components; i++) {
>>           packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);
>>
>>           /* The std140 packing rules don't allow vectors to cross 16-byte
>> @@ -1392,10 +1380,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr
>> *instr)
>>        break;
>>     }
>>
>> -   case nir_intrinsic_load_ubo_vec1_indirect:
>> -   case nir_intrinsic_load_ubo_vec2_indirect:
>> -   case nir_intrinsic_load_ubo_vec3_indirect:
>> -   case nir_intrinsic_load_ubo_vec4_indirect: {
>> +   case nir_intrinsic_load_ubo_indirect: {
>>        fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
>>                                   instr->const_index[0]);
>>        /* Turn the byte offset into a dword offset. */
>> @@ -1404,8 +1389,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr
>> *instr)
>>        emit(SHR(offset, retype(get_nir_src(instr->src[0]),
>> BRW_REGISTER_TYPE_D),
>>                 fs_reg(2)));
>>
>> -      for (unsigned i = 0;
>> -           i < nir_intrinsic_infos[instr->intrinsic].dest_components;
>> i++) {
>> +      for (unsigned i = 0; i < instr->num_components; i++) {
>>           exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,
>>                                                       offset, base_offset
>> + i);
>>           fs_inst *last_inst = (fs_inst *) list.get_tail();
>> @@ -1418,14 +1402,10 @@
>> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>>        break;
>>     }
>>
>> -   case nir_intrinsic_load_input_vec1:
>> -   case nir_intrinsic_load_input_vec2:
>> -   case nir_intrinsic_load_input_vec3:
>> -   case nir_intrinsic_load_input_vec4: {
>> +   case nir_intrinsic_load_input: {
>>        unsigned index = 0;
>>        for (int i = 0; i < instr->const_index[1]; i++) {
>> -         for (unsigned j = 0;
>> -            j < nir_intrinsic_infos[instr->intrinsic].dest_components;
>> j++) {
>> +         for (unsigned j = 0; j < instr->num_components; j++) {
>>              fs_reg src = nir_inputs;
>>              src.reg_offset = instr->const_index[0] + index;
>>              src.type = dest.type;
>> @@ -1441,14 +1421,10 @@
>> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>>        break;
>>     }
>>
>> -   case nir_intrinsic_load_input_vec1_indirect:
>> -   case nir_intrinsic_load_input_vec2_indirect:
>> -   case nir_intrinsic_load_input_vec3_indirect:
>> -   case nir_intrinsic_load_input_vec4_indirect: {
>> +   case nir_intrinsic_load_input_indirect: {
>>        unsigned index = 0;
>>        for (int i = 0; i < instr->const_index[1]; i++) {
>> -         for (unsigned j = 0;
>> -            j < nir_intrinsic_infos[instr->intrinsic].dest_components;
>> j++) {
>> +         for (unsigned j = 0; j < instr->num_components; j++) {
>>              fs_reg src = nir_inputs;
>>              src.reg_offset = instr->const_index[0] + index;
>>              src.reladdr = new(mem_ctx)
>> fs_reg(get_nir_src(instr->src[0]));
>> @@ -1466,15 +1442,11 @@
>> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>>        break;
>>     }
>>
>> -   case nir_intrinsic_store_output_vec1:
>> -   case nir_intrinsic_store_output_vec2:
>> -   case nir_intrinsic_store_output_vec3:
>> -   case nir_intrinsic_store_output_vec4: {
>> +   case nir_intrinsic_store_output: {
>>        fs_reg src = get_nir_src(instr->src[0]);
>>        unsigned index = 0;
>>        for (int i = 0; i < instr->const_index[1]; i++) {
>> -         for (unsigned j = 0;
>> -            j < nir_intrinsic_infos[instr->intrinsic].src_components[0];
>> j++) {
>> +         for (unsigned j = 0; j < instr->num_components; j++) {
>>              fs_reg new_dest = nir_outputs;
>>              new_dest.reg_offset = instr->const_index[0] + index;
>>              new_dest.type = src.type;
>> @@ -1489,16 +1461,12 @@
>> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>>        break;
>>     }
>>
>> -   case nir_intrinsic_store_output_vec1_indirect:
>> -   case nir_intrinsic_store_output_vec2_indirect:
>> -   case nir_intrinsic_store_output_vec3_indirect:
>> -   case nir_intrinsic_store_output_vec4_indirect: {
>> +   case nir_intrinsic_store_output_indirect: {
>>        fs_reg src = get_nir_src(instr->src[0]);
>>        fs_reg indirect = get_nir_src(instr->src[1]);
>>        unsigned index = 0;
>>        for (int i = 0; i < instr->const_index[1]; i++) {
>> -         for (unsigned j = 0;
>> -            j < nir_intrinsic_infos[instr->intrinsic].src_components[0];
>> j++) {
>> +         for (unsigned j = 0; j < instr->num_components; j++) {
>>              fs_reg new_dest = nir_outputs;
>>              new_dest.reg_offset = instr->const_index[0] + index;
>>              new_dest.reladdr = new(mem_ctx) fs_reg(indirect);
>> --
>> 2.2.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150105/558b4c27/attachment-0001.html>


More information about the mesa-dev mailing list