[Mesa-dev] [PATCH v2 22/25] nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes

Wed Jan 4 10:14:45 UTC 2017

On Tue, 2017-01-03 at 14:41 -0800, Jason Ekstrand wrote:
> I made a few pretty trivial comments.  With those addressed,
> 
> Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>
> 
> On Dec 16, 2016 8:55 AM, "Juan A. Suarez Romero" <jasuarez at igalia.com
> > wrote:
> So far, input_reads was a bitmap tracking which vertex input
> locations
> 
> were being used.
> 
> 
> 
> In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
> 
> consumes just one location, any other small attribute. So we mark the
> 
> proper bit in inputs_read, and also the same bit in
> double_inputs_read
> 
> if the attribute is a dvec3/dvec4.
> 
> 
> 
> But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
> 
> consumes two locations, not just one. And hence two bits would be
> marked
> 
> in inputs_read for the same vertex input attribute.
> 
> 
> 
> To avoid handling two different situations in NIR, we just choose the
> 
> latest one: in OpenGL, when creating NIR from GLSL/IR, any
> dvec3/dvec4
> 
> vertex input attribute is marked with two bits in the inputs_read
> bitmap
> 
> (and also in the double_inputs_read), and following attributes are
> 
> adjusted accordingly.
> 
> 
> 
> As example, if in our GLSL/IR shader we have three attributes:
> 
> 
> 
> layout(location = 0) vec3  attr0;
> 
> layout(location = 1) dvec4 attr1;
> 
> layout(location = 2) dvec3 attr2;
> 
> 
> 
> then in our NIR shader we put attr0 in location 0, attr1 in locations
> 1
> 
> and 2, and attr2 in location 3.
> 
> 
> attr2 goes in locations 3 *and* 4, correct?

Yes, attr2 goes in locations 3 and 4.
> Checking carefully, basically we are using slots rather than
> locations
> 
> in NIR.
> 
> 
> 
> When emitting the vertices, we do a inverse map to know the
> 
> corresponding location for each slot.
> 
> 
> 
> v2 (Jason):
> 
> - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
> 
> ---
> 
>  src/compiler/glsl/glsl_to_nir.cpp            | 28 +++++++++++++
> 
>  src/compiler/nir/nir_gather_info.c           | 48 ++++++++++------
> -----
> 
>  src/intel/vulkan/genX_pipeline.c             | 63 +++++++++++++++++-
> ----------
> 
>  src/mesa/drivers/dri/i965/brw_draw_upload.c  | 11 +++--
> 
>  src/mesa/drivers/dri/i965/brw_fs.cpp         | 13 ------
> 
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  3 +-
> 
>  src/mesa/drivers/dri/i965/brw_nir.c          |  6 +--
> 
>  src/mesa/drivers/dri/i965/brw_nir.h          |  1 -
> 
>  src/mesa/drivers/dri/i965/brw_vec4.cpp       | 11 +++--
> 
>  9 files changed, 106 insertions(+), 78 deletions(-)
> 
> 
> 
> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
> b/src/compiler/glsl/glsl_to_nir.cpp
> 
> index 4debc37..0814dad 100644
> 
> --- a/src/compiler/glsl/glsl_to_nir.cpp
> 
> +++ b/src/compiler/glsl/glsl_to_nir.cpp
> 
> @@ -129,6 +129,19 @@ private:
> 
> 
> 
>  } /* end of anonymous namespace */
> 
> 
> 
> +static void
> 
> +nir_remap_attributes(nir_shader *shader)
> 
> +{
> 
> +   nir_foreach_variable(var, &shader->inputs) {
> 
> +      var->data.location += _mesa_bitcount_64(shader->info-
> >double_inputs_read &
> 
> +                                              BITFIELD64_MASK(var-
> >data.location));
> 
> +   }
> 
> +
> 
> +   /* Once the remap is done, reset double_inputs_read, so later it
> will have
> 
> +    * which location/slots are doubles */
> 
> +   shader->info->double_inputs_read = 0;
> 
> +}
> 
> +
> 
>  nir_shader *
> 
>  glsl_to_nir(const struct gl_shader_program *shader_prog,
> 
>              gl_shader_stage stage,
> 
> @@ -146,6 +159,13 @@ glsl_to_nir(const struct gl_shader_program
> *shader_prog,
> 
> 
> 
>     nir_lower_constant_initializers(shader, (nir_variable_mode)~0);
> 
> 
> 
> +   /* Remap the locations to slots so those requiring two slots will
> occupy
> 
> +    * two locations. For instance, if we have in the IR code a dvec3
> attr0 in
> 
> +    * location 0 and vec4 attr1 in location 1, in NIR attr0 will use
> 
> +    * locations/slots 0 and 1, and attr1 will use location/slot 2 */
> 
> +   if (shader->stage == MESA_SHADER_VERTEX)
> 
> +      nir_remap_attributes(shader);
> 
> +
> 
>     shader->info->name = ralloc_asprintf(shader, "GLSL%d",
> shader_prog->Name);
> 
>     if (shader_prog->Label)
> 
>        shader->info->label = ralloc_strdup(shader, shader_prog-
> >Label);
> 
> @@ -315,6 +335,14 @@ nir_visitor::visit(ir_variable *ir)
> 
>        } else {
> 
>           var->data.mode = nir_var_shader_in;
> 
>        }
> 
> +
> 
> +      /* Mark all the locations that require two slots */
> 
> +      if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
> 
> +         for (uint i = 0; i < glsl_count_attribute_slots(var->type,
> true); i++) {
> 
> +            uint64_t bitfield = BITFIELD64_BIT(var->data.location +
> i);
> 
> +            shader->info->double_inputs_read |= bitfield;
> 
> +         }
> 
> +      }
> 
>        break;
> 
> 
> 
>     case ir_var_shader_out:
> 
> diff --git a/src/compiler/nir/nir_gather_info.c
> b/src/compiler/nir/nir_gather_info.c
> 
> index 07c9949..35a1ce4 100644
> 
> --- a/src/compiler/nir/nir_gather_info.c
> 
> +++ b/src/compiler/nir/nir_gather_info.c
> 
> @@ -53,11 +53,6 @@ set_io_mask(nir_shader *shader, nir_variable *var,
> int offset, int len)
> 
>           else
> 
>              shader->info->inputs_read |= bitfield;
> 
> 
> 
> -         /* double inputs read is only for vertex inputs */
> 
> -         if (shader->stage == MESA_SHADER_VERTEX &&
> 
> -             glsl_type_is_dual_slot(glsl_without_array(var->type)))
> 
> -            shader->info->double_inputs_read |= bitfield;
> 
> -
> 
>           if (shader->stage == MESA_SHADER_FRAGMENT) {
> 
>              shader->info->fs.uses_sample_qualifier |= var-
> >data.sample;
> 
>           }
> 
> @@ -83,26 +78,21 @@ static void
> 
>  mark_whole_variable(nir_shader *shader, nir_variable *var)
> 
>  {
> 
>     const struct glsl_type *type = var->type;
> 
> -   bool is_vertex_input = false;
> 
> 
> 
>     if (nir_is_per_vertex_io(var, shader->stage)) {
> 
>        assert(glsl_type_is_array(type));
> 
>        type = glsl_get_array_element(type);
> 
>     }
> 
> 
> 
> -   if (shader->stage == MESA_SHADER_VERTEX &&
> 
> -       var->data.mode == nir_var_shader_in)
> 
> -      is_vertex_input = true;
> 
> -
> 
>     const unsigned slots =
> 
>        var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
> 
> -                        : glsl_count_attribute_slots(type,
> is_vertex_input);
> 
> +                        : glsl_count_attribute_slots(type, false);
> 
> 
> 
>     set_io_mask(shader, var, 0, slots);
> 
>  }
> 
> 
> 
>  static unsigned
> 
> -get_io_offset(nir_deref_var *deref, bool is_vertex_input)
> 
> +get_io_offset(nir_deref_var *deref)
> 
>  {
> 
>     unsigned offset = 0;
> 
> 
> 
> @@ -117,7 +107,7 @@ get_io_offset(nir_deref_var *deref, bool
> is_vertex_input)
> 
>              return -1;
> 
>           }
> 
> 
> 
> -         offset += glsl_count_attribute_slots(tail->type,
> is_vertex_input) *
> 
> +         offset += glsl_count_attribute_slots(tail->type, false) *
> 
>              deref_array->base_offset;
> 
>        }
> 
>        /* TODO: we can get the offset for structs here see
> nir_lower_io() */
> 
> @@ -163,12 +153,7 @@ try_mask_partial_io(nir_shader *shader,
> nir_deref_var *deref)
> 
>        return false;
> 
>     }
> 
> 
> 
> -   bool is_vertex_input = false;
> 
> -   if (shader->stage == MESA_SHADER_VERTEX &&
> 
> -       var->data.mode == nir_var_shader_in)
> 
> -      is_vertex_input = true;
> 
> -
> 
> -   unsigned offset = get_io_offset(deref, is_vertex_input);
> 
> +   unsigned offset = get_io_offset(deref);
> 
>     if (offset == -1)
> 
>        return false;
> 
> 
> 
> @@ -184,8 +169,7 @@ try_mask_partial_io(nir_shader *shader,
> nir_deref_var *deref)
> 
>     }
> 
> 
> 
>     /* double element width for double types that takes two slots */
> 
> -   if (!is_vertex_input &&
> 
> -       glsl_type_is_dual_slot(glsl_without_array(type))) {
> 
> +   if (glsl_type_is_dual_slot(glsl_without_array(type))) {
> 
>        elem_width *= 2;
> 
>     }
> 
> 
> 
> @@ -220,13 +204,27 @@ gather_intrinsic_info(nir_intrinsic_instr
> *instr, nir_shader *shader)
> 
>     case nir_intrinsic_interp_var_at_sample:
> 
>     case nir_intrinsic_interp_var_at_offset:
> 
>     case nir_intrinsic_load_var:
> 
> -   case nir_intrinsic_store_var:
> 
> -      if (instr->variables[0]->var->data.mode == nir_var_shader_in
> ||
> 
> -          instr->variables[0]->var->data.mode == nir_var_shader_out)
> {
> 
> +   case nir_intrinsic_store_var: {
> 
> +      nir_variable *var = instr->variables[0]->var;
> 
> +
> 
> +      if (var->data.mode == nir_var_shader_in ||
> 
> +          var->data.mode == nir_var_shader_out) {
> 
>           if (!try_mask_partial_io(shader, instr->variables[0]))
> 
> -            mark_whole_variable(shader, instr->variables[0]->var);
> 
> +            mark_whole_variable(shader, var);
> 
> +
> 
> +         /* We need to track which input_reads bits correspond to a
> 
> +          * dvec3/dvec4 input attribute */
> 
> +         if (shader->stage == MESA_SHADER_VERTEX &&
> 
> +             var->data.mode == nir_var_shader_in &&
> 
> +             glsl_type_is_dual_slot(glsl_without_array(var->type)))
> {
> 
> +            for (uint i = 0; i < glsl_count_attribute_slots(var-
> >type, false); i++) {
> 
> +               int idx = var->data.location + i;
> 
> +               shader->info->double_inputs_read |=
> BITFIELD64_BIT(idx);
> 
> +            }
> 
> +         }
> 
>        }
> 
>        break;
> 
> +   }
> 
> 
> 
>     case nir_intrinsic_load_draw_id:
> 
>     case nir_intrinsic_load_front_face:
> 
> diff --git a/src/intel/vulkan/genX_pipeline.c
> b/src/intel/vulkan/genX_pipeline.c
> 
> index 845d020..7b94959 100644
> 
> --- a/src/intel/vulkan/genX_pipeline.c
> 
> +++ b/src/intel/vulkan/genX_pipeline.c
> 
> @@ -33,26 +33,33 @@
> 
>  static uint32_t
> 
>  vertex_element_comp_control(enum isl_format format, unsigned comp)
> 
>  {
> 
> -   uint8_t bits;
> 
>     switch (comp) {
> 
> -   case 0: bits = isl_format_layouts[format].channels.r.bits; break;
> 
> -   case 1: bits = isl_format_layouts[format].channels.g.bits; break;
> 
> -   case 2: bits = isl_format_layouts[format].channels.b.bits; break;
> 
> -   case 3: bits = isl_format_layouts[format].channels.a.bits; break;
> 
> -   default: unreachable("Invalid component");
> 
> -   }
> 
> -
> 
> -   if (bits) {
> 
> -      return VFCOMP_STORE_SRC;
> 
> -   } else if (comp < 3) {
> 
> -      return VFCOMP_STORE_0;
> 
> -   } else if (isl_format_layouts[format].channels.r.type == ISL_UINT
> ||
> 
> -            isl_format_layouts[format].channels.r.type == ISL_SINT)
> {
> 
> -      assert(comp == 3);
> 
> -      return VFCOMP_STORE_1_INT;
> 
> -   } else {
> 
> -      assert(comp == 3);
> 
> -      return VFCOMP_STORE_1_FP;
> 
> +   case 0:
> 
> +      return isl_format_layouts[format].channels.r.bits ?
> 
> +         VFCOMP_STORE_SRC : VFCOMP_STORE_0;
> 
> +   case 1:
> 
> +      return isl_format_layouts[format].channels.g.bits ?
> 
> +         VFCOMP_STORE_SRC : VFCOMP_STORE_0;
> 
> +   case 2:
> 
> +      return isl_format_layouts[format].channels.b.bits ?
> 
> +         VFCOMP_STORE_SRC :
> ((isl_format_layouts[format].channels.r.type == ISL_RAW) ?
> 
> +                             VFCOMP_NOSTORE : VFCOMP_STORE_0);
> 
> 
> Given all the line wrapping, I think it would be clearer to just have
> an if ladder here
> 
> 
> +   case 3:
> 
> +      if (isl_format_layouts[format].channels.a.bits)
> 
> +         return VFCOMP_STORE_SRC;
> 
> +      else
> 
> 
> Please use braces when one side of the if/else is multiple lines.
> 
> 
> +         switch (isl_format_layouts[format].channels.r.type) {
> 
> +         case ISL_RAW:
> 
> +            return isl_format_layouts[format].channels.b.bits ?
> 
> +               VFCOMP_STORE_0 : VFCOMP_NOSTORE;
> 
> 
> This seems a bit odd.  Mind explaining what's going on here?
> 
> 
> +         case ISL_UINT:
> 
> +         case ISL_SINT:
> 
> +            return VFCOMP_STORE_1_INT;
> 
> +         default:
> 
> +            return VFCOMP_STORE_1_FP;
> 
> +         }
> 
> +   default:
> 
> +      unreachable("Invalid component");
> 
>     }
> 
>  }
> 
> 
> 
> @@ -64,8 +71,10 @@ emit_vertex_input(struct anv_pipeline *pipeline,
> 
> 
> 
>     /* Pull inputs_read out of the VS prog data */
> 
>     const uint64_t inputs_read = vs_prog_data->inputs_read;
> 
> +   const uint64_t double_inputs_read = vs_prog_data-
> >double_inputs_read;
> 
>     assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
> 
>     const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
> 
> +   const uint32_t elements_double = double_inputs_read >>
> VERT_ATTRIB_GENERIC0;
> 
> 
> 
>  #if GEN_GEN >= 8
> 
>     /* On BDW+, we only need to allocate space for base ids.  Setting
> up
> 
> @@ -83,13 +92,16 @@ emit_vertex_input(struct anv_pipeline *pipeline,
> 
>                                  vs_prog_data->uses_baseinstance;
> 
>  #endif
> 
> 
> 
> -   uint32_t elem_count = __builtin_popcount(elements) +
> needs_svgs_elem;
> 
> -   if (elem_count == 0)
> 
> +   uint32_t elem_count = __builtin_popcount(elements) -
> 
> +      DIV_ROUND_UP(__builtin_popcount(elements_double), 2);
> 
> 
> This had better be divisible by 2...
> 
> 
> +
> 
> +   uint32_t total_elems = elem_count + needs_svgs_elem;
> 
> +   if (total_elems == 0)
> 
>        return;
> 
> 
> 
>     uint32_t *p;
> 
> 
> 
> -   const uint32_t num_dwords = 1 + elem_count * 2;
> 
> +   const uint32_t num_dwords = 1 + total_elems * 2;
> 
>     p = anv_batch_emitn(&pipeline->batch, num_dwords,
> 
>                         GENX(3DSTATE_VERTEX_ELEMENTS));
> 
>     memset(p + 1, 0, (num_dwords - 1) * 4);
> 
> @@ -107,7 +119,10 @@ emit_vertex_input(struct anv_pipeline *pipeline,
> 
>        if ((elements & (1 << desc->location)) == 0)
> 
>           continue; /* Binding unused */
> 
> 
> 
> -      uint32_t slot = __builtin_popcount(elements & ((1 << desc-
> >location) - 1));
> 
> +      uint32_t slot =
> 
> +         __builtin_popcount(elements & ((1 << desc->location) - 1))
> -
> 
> +         DIV_ROUND_UP(__builtin_popcount(elements_double &
> 
> +                                        ((1 << desc->location) -1)),
> 2);
> 
> 
> 
>        struct GENX(VERTEX_ELEMENT_STATE) element = {
> 
>           .VertexBufferIndex = desc->binding,
> 
> @@ -137,7 +152,7 @@ emit_vertex_input(struct anv_pipeline *pipeline,
> 
>  #endif
> 
>     }
> 
> 
> 
> -   const uint32_t id_slot = __builtin_popcount(elements);
> 
> +   const uint32_t id_slot = elem_count;
> 
>     if (needs_svgs_elem) {
> 
>        /* From the Broadwell PRM for the 3D_Vertex_Component_Control
> enum:
> 
>         *    "Within a VERTEX_ELEMENT_STATE structure, if a Component
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> 
> index b138cb7..c3655d8 100644
> 
> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> 
> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> 
> @@ -472,11 +472,16 @@ brw_prepare_vertices(struct brw_context *brw)
> 
>     /* Accumulate the list of enabled arrays. */
> 
>     brw->vb.nr_enabled = 0;
> 
>     while (vs_inputs) {
> 
> -      GLuint index = ffsll(vs_inputs) - 1;
> 
> +      GLuint first = ffsll(vs_inputs) - 1;
> 
> +      GLuint index =
> 
> +         first - DIV_ROUND_UP(_mesa_bitcount_64(vs_prog_data-
> >double_inputs_read &
> 
> +                                               
> BITFIELD64_MASK(first)), 2);
> 
>        struct brw_vertex_element *input = &brw->vb.inputs[index];
> 
>        input->is_dual_slot = brw->gen >= 8 &&
> 
> -         (vs_prog_data->double_inputs_read & BITFIELD64_BIT(index))
> != 0;
> 
> -      vs_inputs &= ~BITFIELD64_BIT(index);
> 
> +         (vs_prog_data->double_inputs_read & BITFIELD64_BIT(first))
> != 0;
> 
> +      vs_inputs &= ~BITFIELD64_BIT(first);
> 
> +      if (input->is_dual_slot)
> 
> +         vs_inputs &= ~BITFIELD64_BIT(first + 1);
> 
>        brw->vb.enabled[brw->vb.nr_enabled++] = input;
> 
>     }
> 
> 
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> 
> index c218f56..b383d98 100644
> 
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> 
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> 
> @@ -492,19 +492,6 @@ type_size_scalar(const struct glsl_type *type)
> 
>     return 0;
> 
>  }
> 
> 
> 
> -/* Attribute arrays are loaded as one vec4 per element (or matrix
> column),
> 
> - * except for double-precision types, which are loaded as one dvec4.
> 
> - */
> 
> -extern "C" int
> 
> -type_size_vs_input(const struct glsl_type *type)
> 
> -{
> 
> -   if (type->is_double()) {
> 
> -      return type_size_dvec4(type);
> 
> -   } else {
> 
> -      return type_size_vec4(type);
> 
> -   }
> 
> -}
> 
> -
> 
>  /**
> 
>   * Create a MOV to read the timestamp register.
> 
>   *
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> 
> index 14415bd..5c356d6 100644
> 
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> 
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> 
> @@ -36,8 +36,7 @@ fs_reg *
> 
>  fs_visitor::emit_vs_system_value(int location)
> 
>  {
> 
>     fs_reg *reg = new(this->mem_ctx)
> 
> -      fs_reg(ATTR, 4 * (_mesa_bitcount_64(nir->info->inputs_read) +
> 
> -                        _mesa_bitcount_64(nir->info-
> >double_inputs_read)),
> 
> +      fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info->inputs_read),
> 
>               BRW_REGISTER_TYPE_D);
> 
>     struct brw_vs_prog_data *vs_prog_data =
> brw_vs_prog_data(prog_data);
> 
> 
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
> b/src/mesa/drivers/dri/i965/brw_nir.c
> 
> index 763e3ec..24334c8 100644
> 
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> 
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> 
> @@ -113,9 +113,7 @@ remap_vs_attrs(nir_block *block, shader_info
> *nir_info)
> 
>           int attr = intrin->const_index[0];
> 
>           int slot = _mesa_bitcount_64(nir_info->inputs_read &
> 
>                                        BITFIELD64_MASK(attr));
> 
> -         int dslot = _mesa_bitcount_64(nir_info->double_inputs_read
> &
> 
> -                                       BITFIELD64_MASK(attr));
> 
> -         intrin->const_index[0] = 4 * (slot + dslot);
> 
> +         intrin->const_index[0] = 4 * slot;
> 
>        }
> 
>     }
> 
>     return true;
> 
> @@ -204,7 +202,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
> 
>      * loaded as one vec4 or dvec4 per element (or matrix column),
> depending on
> 
>      * whether it is a double-precision type or not.
> 
>      */
> 
> -   nir_lower_io(nir, nir_var_shader_in, type_size_vs_input, 0);
> 
> +   nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
> 
> 
> 
>     /* This pass needs actual constants */
> 
>     nir_opt_constant_folding(nir);
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.h
> b/src/mesa/drivers/dri/i965/brw_nir.h
> 
> index 3c774d0..04c7ef8 100644
> 
> --- a/src/mesa/drivers/dri/i965/brw_nir.h
> 
> +++ b/src/mesa/drivers/dri/i965/brw_nir.h
> 
> @@ -34,7 +34,6 @@ extern "C" {
> 
>  int type_size_scalar(const struct glsl_type *type);
> 
>  int type_size_vec4(const struct glsl_type *type);
> 
>  int type_size_dvec4(const struct glsl_type *type);
> 
> -int type_size_vs_input(const struct glsl_type *type);
> 
> 
> 
>  static inline int
> 
>  type_size_scalar_bytes(const struct glsl_type *type)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> 
> index b9e592f..852e88f 100644
> 
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> 
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> 
> @@ -2127,7 +2127,7 @@ brw_compile_vs(const struct brw_compiler
> *compiler, void *log_data,
> 
>        ((1 << shader->info->cull_distance_array_size) - 1) <<
> 
>        shader->info->clip_distance_array_size;
> 
> 
> 
> -   unsigned nr_attributes = _mesa_bitcount_64(prog_data-
> >inputs_read);
> 
> +   unsigned nr_attribute_slots = _mesa_bitcount_64(prog_data-
> >inputs_read);
> 
> 
> 
>     /* gl_VertexID and gl_InstanceID are system values, but arrive
> via an
> 
>      * incoming vertex attribute.  So, add an extra slot.
> 
> @@ -2137,18 +2137,17 @@ brw_compile_vs(const struct brw_compiler
> *compiler, void *log_data,
> 
>          BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
> 
>          BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
> 
>          BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
> 
> -      nr_attributes++;
> 
> +      nr_attribute_slots++;
> 
>     }
> 
> 
> 
>     /* gl_DrawID has its very own vec4 */
> 
>     if (shader->info->system_values_read &
> 
>         BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
> 
> -      nr_attributes++;
> 
> +      nr_attribute_slots++;
> 
>     }
> 
> 
> 
> -   unsigned nr_attribute_slots =
> 
> -      nr_attributes +
> 
> -      _mesa_bitcount_64(shader->info->double_inputs_read);
> 
> +   unsigned nr_attributes = nr_attribute_slots -
> 
> +      DIV_ROUND_UP(_mesa_bitcount_64(shader->info-
> >double_inputs_read), 2);
> 
> 
> 
>     /* The 3DSTATE_VS documentation lists the lower bound on "Vertex
> URB Entry
> 
>      * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. 
> Empirically, in
> 
> --
> 
> 2.9.3
> 
> 
> 
> _______________________________________________
> 
> mesa-dev mailing list
> 
> mesa-dev at lists.freedesktop.org
> 
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 
> 
> 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170104/4d967ca4/attachment-0001.html>