[Mesa-dev] [PATCH 19/22] anv/nir: add support for dvec3/4 consuming two locations

Jason Ekstrand jason at jlekstrand.net
Fri Dec 2 19:49:46 UTC 2016


On Thu, Dec 1, 2016 at 10:22 PM, Jason Ekstrand <jason at jlekstrand.net>
wrote:

> +Ken
>
> On Thu, Dec 1, 2016 at 10:17 PM, Jason Ekstrand <jason at jlekstrand.net>
> wrote:
>
>> I'm not sure how I feel about this one.  It seems like it would almost be
>> easier to just pick one convention or the other for NIR and adjust one of
>> the drivers accordingly.  I don't know that I have a huge preference which
>> convention we choose.  I guess the Vulkan convention matches our hardware a
>> bit better.  In either case, converting from one to the other should be a
>> simple matter of building a remap table or a creative use of popcount.
>>
>
As another data point, TGSI uses 2 slots for dvec3/4 inputs.  I think the
simplest and most consistent thing to do is make NIR use 2 slots and just
change the GL driver to work that way.  That way we can keep GL's oddity as
contained as possible.


> On Fri, Nov 25, 2016 at 12:52 AM, Juan A. Suarez Romero <
>> jasuarez at igalia.com> wrote:
>>
>>> One difference between OpenGL and Vulkan regarding 64-bit vertex
>>> attribute types is that dvec3 and dvec4 consumes just one location in
>>> OpenGL, while in Vulkan it consumes two locations.
>>>
>>> Thus, in OpenGL for each dvec3/dvec4 vertex attrib we mark just one bit
>>> in our internal inputs_read bitmap (and also the corresponding bit in
>>> double_inputs_read bitmap) while in Vulkan we mark two consecutive bits
>>> in both bitmaps.
>>>
>>> This is handled with a nir option called "dvec3_consumes_two_locations",
>>> which is set to True for Vulkan code. And all the computation regarding
>>> emitting vertices as well as the mapping between attributes and physical
>>> registers use this option to correctly do the work.
>>> ---
>>>  src/amd/vulkan/radv_pipeline.c               |  1 +
>>>  src/compiler/nir/nir.h                       |  5 +++
>>>  src/compiler/nir/nir_gather_info.c           |  6 +--
>>>  src/gallium/drivers/freedreno/ir3/ir3_nir.c  |  1 +
>>>  src/intel/vulkan/anv_device.c                |  2 +-
>>>  src/intel/vulkan/genX_pipeline.c             | 62
>>> +++++++++++++++++-----------
>>>  src/mesa/drivers/dri/i965/brw_compiler.c     | 23 ++++++++++-
>>>  src/mesa/drivers/dri/i965/brw_compiler.h     |  2 +-
>>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 14 +++++--
>>>  src/mesa/drivers/dri/i965/brw_nir.c          | 18 +++++---
>>>  src/mesa/drivers/dri/i965/brw_vec4.cpp       | 13 ++++--
>>>  src/mesa/drivers/dri/i965/intel_screen.c     |  3 +-
>>>  12 files changed, 105 insertions(+), 45 deletions(-)
>>>
>>> diff --git a/src/amd/vulkan/radv_pipeline.c
>>> b/src/amd/vulkan/radv_pipeline.c
>>> index ee5d812..90d4650 100644
>>> --- a/src/amd/vulkan/radv_pipeline.c
>>> +++ b/src/amd/vulkan/radv_pipeline.c
>>> @@ -59,6 +59,7 @@ static const struct nir_shader_compiler_options
>>> nir_options = {
>>>         .lower_unpack_unorm_4x8 = true,
>>>         .lower_extract_byte = true,
>>>         .lower_extract_word = true,
>>> +       .dvec3_consumes_two_locations = true,
>>>  };
>>>
>>>  VkResult radv_CreateShaderModule(
>>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>>> index 1679d89..0fc8f39 100644
>>> --- a/src/compiler/nir/nir.h
>>> +++ b/src/compiler/nir/nir.h
>>> @@ -1794,6 +1794,11 @@ typedef struct nir_shader_compiler_options {
>>>      * information must be inferred from the list of input nir_variables.
>>>      */
>>>     bool use_interpolated_input_intrinsics;
>>> +
>>> +   /**
>>> +    * In Vulkan, a dvec3/dvec4 consumes two locations instead just one.
>>> +    */
>>> +   bool dvec3_consumes_two_locations;
>>>  } nir_shader_compiler_options;
>>>
>>>  typedef struct nir_shader {
>>> diff --git a/src/compiler/nir/nir_gather_info.c
>>> b/src/compiler/nir/nir_gather_info.c
>>> index 07c9949..8c80671 100644
>>> --- a/src/compiler/nir/nir_gather_info.c
>>> +++ b/src/compiler/nir/nir_gather_info.c
>>> @@ -96,7 +96,7 @@ mark_whole_variable(nir_shader *shader, nir_variable
>>> *var)
>>>
>>>     const unsigned slots =
>>>        var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
>>> -                        : glsl_count_attribute_slots(type,
>>> is_vertex_input);
>>> +                        : glsl_count_attribute_slots(type,
>>> is_vertex_input && !shader->options->dvec3_consumes_two_locations);
>>>
>>
>> This makes no sense, why are we passing is_vertex_input &&
>> !dvec3_consumes_two_locations to an argument labled is_vertex_input?
>>
>>
>>>
>>>     set_io_mask(shader, var, 0, slots);
>>>  }
>>> @@ -168,7 +168,7 @@ try_mask_partial_io(nir_shader *shader,
>>> nir_deref_var *deref)
>>>         var->data.mode == nir_var_shader_in)
>>>        is_vertex_input = true;
>>>
>>> -   unsigned offset = get_io_offset(deref, is_vertex_input);
>>> +   unsigned offset = get_io_offset(deref, is_vertex_input &&
>>> !shader->options->dvec3_consumes_two_locations);
>>>
>>
>> Same here
>>
>>
>>>     if (offset == -1)
>>>        return false;
>>>
>>> @@ -184,7 +184,7 @@ try_mask_partial_io(nir_shader *shader,
>>> nir_deref_var *deref)
>>>     }
>>>
>>>     /* double element width for double types that takes two slots */
>>> -   if (!is_vertex_input &&
>>> +   if ((!is_vertex_input || shader->options->dvec3_consumes_two_locations)
>>> &&
>>>         glsl_type_is_dual_slot(glsl_without_array(type))) {
>>>
>>
>> This makes a bit more sense but I'm still not quite seeing it.
>>
>>
>>>        elem_width *= 2;
>>>     }
>>> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
>>> b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
>>> index 2d86a52..5c5c9ad 100644
>>> --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
>>> +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
>>> @@ -50,6 +50,7 @@ static const nir_shader_compiler_options options = {
>>>                 .vertex_id_zero_based = true,
>>>                 .lower_extract_byte = true,
>>>                 .lower_extract_word = true,
>>> +               .dvec3_consumes_two_locations = false,
>>>  };
>>>
>>>  struct nir_shader *
>>> diff --git a/src/intel/vulkan/anv_device.c
>>> b/src/intel/vulkan/anv_device.c
>>> index 2c8ac49..725848f 100644
>>> --- a/src/intel/vulkan/anv_device.c
>>> +++ b/src/intel/vulkan/anv_device.c
>>> @@ -167,7 +167,7 @@ anv_physical_device_init(struct anv_physical_device
>>> *device,
>>>
>>>     brw_process_intel_debug_variable();
>>>
>>> -   device->compiler = brw_compiler_create(NULL, &device->info);
>>> +   device->compiler = brw_compiler_create(NULL, &device->info, true);
>>>     if (device->compiler == NULL) {
>>>        result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
>>>        goto fail;
>>> diff --git a/src/intel/vulkan/genX_pipeline.c
>>> b/src/intel/vulkan/genX_pipeline.c
>>> index cb164ad..97c40b8 100644
>>> --- a/src/intel/vulkan/genX_pipeline.c
>>> +++ b/src/intel/vulkan/genX_pipeline.c
>>> @@ -33,26 +33,33 @@
>>>  static uint32_t
>>>  vertex_element_comp_control(enum isl_format format, unsigned comp)
>>>  {
>>> -   uint8_t bits;
>>>     switch (comp) {
>>> -   case 0: bits = isl_format_layouts[format].channels.r.bits; break;
>>> -   case 1: bits = isl_format_layouts[format].channels.g.bits; break;
>>> -   case 2: bits = isl_format_layouts[format].channels.b.bits; break;
>>> -   case 3: bits = isl_format_layouts[format].channels.a.bits; break;
>>> -   default: unreachable("Invalid component");
>>> -   }
>>> -
>>> -   if (bits) {
>>> -      return VFCOMP_STORE_SRC;
>>> -   } else if (comp < 3) {
>>> -      return VFCOMP_STORE_0;
>>> -   } else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
>>> -            isl_format_layouts[format].channels.r.type == ISL_SINT) {
>>> -      assert(comp == 3);
>>> -      return VFCOMP_STORE_1_INT;
>>> -   } else {
>>> -      assert(comp == 3);
>>> -      return VFCOMP_STORE_1_FP;
>>> +   case 0:
>>> +      return isl_format_layouts[format].channels.r.bits ?
>>> +         VFCOMP_STORE_SRC : VFCOMP_STORE_0;
>>> +   case 1:
>>> +      return isl_format_layouts[format].channels.g.bits ?
>>> +         VFCOMP_STORE_SRC : VFCOMP_STORE_0;
>>> +   case 2:
>>> +      return isl_format_layouts[format].channels.b.bits ?
>>> +         VFCOMP_STORE_SRC : ((isl_format_layouts[format].channels.r.type
>>> == ISL_RAW) ?
>>> +                             VFCOMP_NOSTORE : VFCOMP_STORE_0);
>>> +   case 3:
>>> +      if (isl_format_layouts[format].channels.a.bits)
>>> +         return VFCOMP_STORE_SRC;
>>> +      else
>>> +         switch (isl_format_layouts[format].channels.r.type) {
>>> +         case ISL_RAW:
>>> +            return isl_format_layouts[format].channels.b.bits ?
>>> +               VFCOMP_STORE_0 : VFCOMP_NOSTORE;
>>> +         case ISL_UINT:
>>> +         case ISL_SINT:
>>> +            return VFCOMP_STORE_1_INT;
>>> +         default:
>>> +            return VFCOMP_STORE_1_FP;
>>> +         }
>>> +   default:
>>> +      unreachable("Invalid component");
>>>     }
>>>  }
>>>
>>> @@ -64,8 +71,10 @@ emit_vertex_input(struct anv_pipeline *pipeline,
>>>
>>>     /* Pull inputs_read out of the VS prog data */
>>>     const uint64_t inputs_read = vs_prog_data->inputs_read;
>>> +   const uint64_t double_inputs_read = vs_prog_data->double_inputs_re
>>> ad;
>>>     assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
>>>     const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
>>> +   const uint32_t elements_double = double_inputs_read >>
>>> VERT_ATTRIB_GENERIC0;
>>>
>>>  #if GEN_GEN >= 8
>>>     /* On BDW+, we only need to allocate space for base ids.  Setting up
>>> @@ -83,13 +92,16 @@ emit_vertex_input(struct anv_pipeline *pipeline,
>>>                                  vs_prog_data->uses_baseinstance;
>>>  #endif
>>>
>>> -   uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem;
>>> -   if (elem_count == 0)
>>> +   uint32_t elem_count =
>>> +      __builtin_popcount(elements) - DIV_ROUND_UP(__builtin_popcount(elements_double),
>>> 2);
>>> +
>>> +   uint32_t total_elems = elem_count + needs_svgs_elem;
>>> +   if (total_elems == 0)
>>>        return;
>>>
>>>     uint32_t *p;
>>>
>>> -   const uint32_t num_dwords = 1 + elem_count * 2;
>>> +   const uint32_t num_dwords = 1 + total_elems * 2;
>>>     p = anv_batch_emitn(&pipeline->batch, num_dwords,
>>>                         GENX(3DSTATE_VERTEX_ELEMENTS));
>>>     memset(p + 1, 0, (num_dwords - 1) * 4);
>>> @@ -107,7 +119,9 @@ emit_vertex_input(struct anv_pipeline *pipeline,
>>>        if ((elements & (1 << desc->location)) == 0)
>>>           continue; /* Binding unused */
>>>
>>> -      uint32_t slot = __builtin_popcount(elements & ((1 <<
>>> desc->location) - 1));
>>> +      uint32_t slot =
>>> +         __builtin_popcount(elements & ((1 << desc->location) - 1)) -
>>> +         DIV_ROUND_UP(__builtin_popcount(elements_double & ((1 <<
>>> desc->location) - 1)), 2);
>>>
>>>        struct GENX(VERTEX_ELEMENT_STATE) element = {
>>>           .VertexBufferIndex = desc->binding,
>>> @@ -137,7 +151,7 @@ emit_vertex_input(struct anv_pipeline *pipeline,
>>>  #endif
>>>     }
>>>
>>> -   const uint32_t id_slot = __builtin_popcount(elements);
>>> +   const uint32_t id_slot = elem_count;
>>>     if (needs_svgs_elem) {
>>>        /* From the Broadwell PRM for the 3D_Vertex_Component_Control
>>> enum:
>>>         *    "Within a VERTEX_ELEMENT_STATE structure, if a Component
>>> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c
>>> b/src/mesa/drivers/dri/i965/brw_compiler.c
>>> index 1aa72bc..b9eceeb 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_compiler.c
>>> +++ b/src/mesa/drivers/dri/i965/brw_compiler.c
>>> @@ -55,6 +55,22 @@ static const struct nir_shader_compiler_options
>>> scalar_nir_options = {
>>>     .lower_unpack_snorm_4x8 = true,
>>>     .lower_unpack_unorm_2x16 = true,
>>>     .lower_unpack_unorm_4x8 = true,
>>> +   .dvec3_consumes_two_locations = false,
>>> +};
>>> +
>>> +static const struct nir_shader_compiler_options
>>> vulkan_scalar_nir_options = {
>>> +   COMMON_OPTIONS,
>>> +   .lower_pack_half_2x16 = true,
>>> +   .lower_pack_snorm_2x16 = true,
>>> +   .lower_pack_snorm_4x8 = true,
>>> +   .lower_pack_unorm_2x16 = true,
>>> +   .lower_pack_unorm_4x8 = true,
>>> +   .lower_unpack_half_2x16 = true,
>>> +   .lower_unpack_snorm_2x16 = true,
>>> +   .lower_unpack_snorm_4x8 = true,
>>> +   .lower_unpack_unorm_2x16 = true,
>>> +   .lower_unpack_unorm_4x8 = true,
>>> +   .dvec3_consumes_two_locations = true,
>>>  };
>>>
>>>  static const struct nir_shader_compiler_options vector_nir_options = {
>>> @@ -75,6 +91,7 @@ static const struct nir_shader_compiler_options
>>> vector_nir_options = {
>>>     .lower_unpack_unorm_2x16 = true,
>>>     .lower_extract_byte = true,
>>>     .lower_extract_word = true,
>>> +   .dvec3_consumes_two_locations = false,
>>>  };
>>>
>>>  static const struct nir_shader_compiler_options vector_nir_options_gen6
>>> = {
>>> @@ -92,10 +109,11 @@ static const struct nir_shader_compiler_options
>>> vector_nir_options_gen6 = {
>>>     .lower_unpack_unorm_2x16 = true,
>>>     .lower_extract_byte = true,
>>>     .lower_extract_word = true,
>>> +   .dvec3_consumes_two_locations = false,
>>>  };
>>>
>>>  struct brw_compiler *
>>> -brw_compiler_create(void *mem_ctx, const struct gen_device_info
>>> *devinfo)
>>> +brw_compiler_create(void *mem_ctx, const struct gen_device_info
>>> *devinfo, bool is_vulkan)
>>>  {
>>>     struct brw_compiler *compiler = rzalloc(mem_ctx, struct
>>> brw_compiler);
>>>
>>> @@ -138,7 +156,8 @@ brw_compiler_create(void *mem_ctx, const struct
>>> gen_device_info *devinfo)
>>>           compiler->glsl_compiler_options[i].EmitNoIndirectSampler =
>>> true;
>>>
>>>        if (is_scalar) {
>>> -         compiler->glsl_compiler_options[i].NirOptions =
>>> &scalar_nir_options;
>>> +         compiler->glsl_compiler_options[i].NirOptions = is_vulkan ?
>>> &vulkan_scalar_nir_options
>>> +                                                                   :
>>> &scalar_nir_options;
>>>        } else {
>>>           compiler->glsl_compiler_options[i].NirOptions =
>>>              devinfo->gen < 6 ? &vector_nir_options :
>>> &vector_nir_options_gen6;
>>> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h
>>> b/src/mesa/drivers/dri/i965/brw_compiler.h
>>> index 65a7478..34df343 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_compiler.h
>>> +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
>>> @@ -760,7 +760,7 @@ DEFINE_PROG_DATA_DOWNCAST(sf)
>>>  /** @} */
>>>
>>>  struct brw_compiler *
>>> -brw_compiler_create(void *mem_ctx, const struct gen_device_info
>>> *devinfo);
>>> +brw_compiler_create(void *mem_ctx, const struct gen_device_info
>>> *devinfo, bool is_vulkan);
>>>
>>>  /**
>>>   * Compile a vertex shader.
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> index 14415bd..00c5bb4 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> @@ -35,10 +35,16 @@ using namespace brw;
>>>  fs_reg *
>>>  fs_visitor::emit_vs_system_value(int location)
>>>  {
>>> -   fs_reg *reg = new(this->mem_ctx)
>>> -      fs_reg(ATTR, 4 * (_mesa_bitcount_64(nir->info->inputs_read) +
>>> -                        _mesa_bitcount_64(nir->info->d
>>> ouble_inputs_read)),
>>> -             BRW_REGISTER_TYPE_D);
>>> +   fs_reg *reg;
>>> +   if (nir->options->dvec3_consumes_two_locations)
>>> +      reg = new(this->mem_ctx)
>>> +         fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info->inputs_read),
>>> +                BRW_REGISTER_TYPE_D);
>>> +   else
>>> +      reg = new(this->mem_ctx)
>>> +         fs_reg(ATTR, 4 * (_mesa_bitcount_64(nir->info->inputs_read) +
>>> +                           _mesa_bitcount_64(nir->info->
>>> double_inputs_read)),
>>> +                BRW_REGISTER_TYPE_D);
>>>     struct brw_vs_prog_data *vs_prog_data = brw_vs_prog_data(prog_data);
>>>
>>>     switch (location) {
>>> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
>>> b/src/mesa/drivers/dri/i965/brw_nir.c
>>> index 763e3ec..daa22c6 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_nir.c
>>> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
>>> @@ -96,7 +96,7 @@ add_const_offset_to_base(nir_shader *nir,
>>> nir_variable_mode mode)
>>>  }
>>>
>>>  static bool
>>> -remap_vs_attrs(nir_block *block, shader_info *nir_info)
>>> +remap_vs_attrs(nir_block *block, nir_shader *nir)
>>>  {
>>>     nir_foreach_instr(instr, block) {
>>>        if (instr->type != nir_instr_type_intrinsic)
>>> @@ -111,10 +111,13 @@ remap_vs_attrs(nir_block *block, shader_info
>>> *nir_info)
>>>            * before it and counting the bits.
>>>            */
>>>           int attr = intrin->const_index[0];
>>> -         int slot = _mesa_bitcount_64(nir_info->inputs_read &
>>> +         int slot = _mesa_bitcount_64(nir->info->inputs_read &
>>> +                                      BITFIELD64_MASK(attr));
>>> +
>>> +         int dslot = 0;
>>> +         if (!nir->options->dvec3_consumes_two_locations)
>>> +            dslot = _mesa_bitcount_64(nir->info->double_inputs_read &
>>>                                        BITFIELD64_MASK(attr));
>>> -         int dslot = _mesa_bitcount_64(nir_info->double_inputs_read &
>>> -                                       BITFIELD64_MASK(attr));
>>>           intrin->const_index[0] = 4 * (slot + dslot);
>>>        }
>>>     }
>>> @@ -204,7 +207,10 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>>>      * loaded as one vec4 or dvec4 per element (or matrix column),
>>> depending on
>>>      * whether it is a double-precision type or not.
>>>      */
>>> -   nir_lower_io(nir, nir_var_shader_in, type_size_vs_input, 0);
>>> +   nir_lower_io(nir,
>>> +                nir_var_shader_in,
>>> +                nir->options->dvec3_consumes_two_locations ?
>>> type_size_vec4 : type_size_vs_input,
>>> +                0);
>>>
>>>     /* This pass needs actual constants */
>>>     nir_opt_constant_folding(nir);
>>> @@ -220,7 +226,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>>>        nir_foreach_function(function, nir) {
>>>           if (function->impl) {
>>>              nir_foreach_block(block, function->impl) {
>>> -               remap_vs_attrs(block, nir->info);
>>> +               remap_vs_attrs(block, nir);
>>>              }
>>>           }
>>>        }
>>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>>> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>>> index b9e592f..0aab97f 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>>> @@ -2129,6 +2129,9 @@ brw_compile_vs(const struct brw_compiler
>>> *compiler, void *log_data,
>>>
>>>     unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read);
>>>
>>> +   if (shader->options->dvec3_consumes_two_locations)
>>> +      nr_attributes -= DIV_ROUND_UP(_mesa_bitcount_64
>>> (prog_data->double_inputs_read), 2);
>>> +
>>>     /* gl_VertexID and gl_InstanceID are system values, but arrive via an
>>>      * incoming vertex attribute.  So, add an extra slot.
>>>      */
>>> @@ -2146,9 +2149,13 @@ brw_compile_vs(const struct brw_compiler
>>> *compiler, void *log_data,
>>>        nr_attributes++;
>>>     }
>>>
>>> -   unsigned nr_attribute_slots =
>>> -      nr_attributes +
>>> -      _mesa_bitcount_64(shader->info->double_inputs_read);
>>> +   unsigned nr_attribute_slots = nr_attributes;
>>> +   if (shader->options->dvec3_consumes_two_locations)
>>> +      nr_attribute_slots +=
>>> +         DIV_ROUND_UP(_mesa_bitcount_64(shader->info->double_inputs_read),
>>> 2);
>>> +   else
>>> +      nr_attribute_slots +=
>>> +         _mesa_bitcount_64(shader->info->double_inputs_read);
>>>
>>>     /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB
>>> Entry
>>>      * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode.
>>> Empirically, in
>>> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
>>> b/src/mesa/drivers/dri/i965/intel_screen.c
>>> index e1c3c19..c050f86 100644
>>> --- a/src/mesa/drivers/dri/i965/intel_screen.c
>>> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
>>> @@ -1688,7 +1688,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen
>>> *dri_screen)
>>>        ? screenExtensions : intelRobustScreenExtensions;
>>>
>>>     screen->compiler = brw_compiler_create(screen,
>>> -                                          &screen->devinfo);
>>> +                                          &screen->devinfo,
>>> +                                          false);
>>>     screen->compiler->shader_debug_log = shader_debug_log_mesa;
>>>     screen->compiler->shader_perf_log = shader_perf_log_mesa;
>>>     screen->program_id = 1;
>>> --
>>> 2.7.4
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20161202/0f9a8a15/attachment-0001.html>


More information about the mesa-dev mailing list