[Mesa-dev] [PATCH 5/7] i965: add support for image AoA

Francisco Jerez currojerez at riseup.net
Fri Oct 30 06:24:11 PDT 2015


Timothy Arceri <t_arceri at yahoo.com.au> writes:

> Cc: Francisco Jerez <currojerez at riseup.net>
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp       | 44 ++++++++++++++++----------
>  src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp |  2 ++
>  2 files changed, 30 insertions(+), 16 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 0e044d0..16b5f0a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1037,19 +1037,27 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
>  {
>     fs_reg image(UNIFORM, deref->var->data.driver_location,
>                  BRW_REGISTER_TYPE_UD);
> -
> -   if (deref->deref.child) {
> -      const nir_deref_array *deref_array =
> -         nir_deref_as_array(deref->deref.child);
> -      assert(deref->deref.child->deref_type == nir_deref_type_array &&
> -             deref_array->deref.child == NULL);
> -      const unsigned size = glsl_get_length(deref->var->type);
> +   fs_reg *indirect_offset = NULL;

Assign the indirect indexing temporary directly to image.reladdr so you
avoid this declaration and the 'if (indirect_offset)...' conditional below.

> +
> +   unsigned img_offset = 0;

Do 'image = offset(image, bld, base * element-size)' inside the loop so
you avoid this redundant temporary and the assignment at the bottom.

> +   const nir_deref *tail = &deref->deref;
> +   while (tail->child) {

Looks like a for loop 'for (const nir_deref *tail = deref->deref.child;
                            tail; tail = tail->child)'.

> +      const nir_deref_array *deref_array = nir_deref_as_array(tail->child);
> +      assert(tail->child->deref_type == nir_deref_type_array);
> +      tail = tail->child;
> +      const unsigned size = glsl_get_length(tail->type);
> +      const unsigned child_array_elements = tail->child != NULL ?
> +         glsl_get_aoa_size(tail->type) : 1;
>        const unsigned base = MIN2(deref_array->base_offset, size - 1);
> -
> -      image = offset(image, bld, base * BRW_IMAGE_PARAM_SIZE);
> +      const unsigned aoa_size = child_array_elements * BRW_IMAGE_PARAM_SIZE;

How about "element_size" instead?  And wouldn't it be easier to
initialize it to 'type_size_scalar(tail->type)' so you avoid the
child_array_elements computation and multiplication by
BRW_IMAGE_PARAM_SIZE?

> +      img_offset += base * aoa_size;
>  
>        if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
> -         fs_reg *tmp = new(mem_ctx) fs_reg(vgrf(glsl_type::int_type));
> +         fs_reg tmp = vgrf(glsl_type::int_type);
> +         if (indirect_offset == NULL) {
> +            indirect_offset = new(mem_ctx) fs_reg(vgrf(glsl_type::int_type));
> +            bld.MOV(*indirect_offset, fs_reg(0));
> +         }
>  
>           if (devinfo->gen == 7 && !devinfo->is_haswell) {
>              /* IVB hangs when trying to access an invalid surface index with
> @@ -1060,18 +1068,22 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
>               * of the possible outcomes of the hang.  Clamp the index to
>               * prevent access outside of the array bounds.
>               */
> -            bld.emit_minmax(*tmp, retype(get_nir_src(deref_array->indirect),
> -                                         BRW_REGISTER_TYPE_UD),
> +            bld.emit_minmax(tmp, retype(get_nir_src(deref_array->indirect),
> +                                        BRW_REGISTER_TYPE_UD),
>                              fs_reg(size - base - 1), BRW_CONDITIONAL_L);
>           } else {
> -            bld.MOV(*tmp, get_nir_src(deref_array->indirect));
> +            bld.MOV(tmp, get_nir_src(deref_array->indirect));
>           }
> -
> -         bld.MUL(*tmp, *tmp, fs_reg(BRW_IMAGE_PARAM_SIZE));
> -         image.reladdr = tmp;
> +         bld.MUL(tmp, tmp, fs_reg(aoa_size));

If you create the reladdr temporary down here you may be able avoid the
useless zero-initialization and addition for the first AoA level, like:

| if (image.reladdr)
|    bld.ADD(*image.reladdr, *image.reladdr, tmp);
| else
|    image.reladdr = new(mem_ctx) fs_reg(tmp);

> +         bld.ADD(*indirect_offset, *indirect_offset, tmp);
>        }
>     }
>  
> +   if (indirect_offset) {
> +      image.reladdr = indirect_offset;
> +   }
> +   image = offset(image, bld, img_offset);
> +
>     return image;
>  }
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> index d3326e9..87b3839 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> @@ -98,6 +98,8 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
>        if (storage->type->is_image()) {
>           brw_setup_image_uniform_values(stage, stage_prog_data,
>                                          uniform_index, storage);
> +         uniform_index +=
> +            BRW_IMAGE_PARAM_SIZE * MAX2(storage->array_elements, 1);
>        } else {
>           gl_constant_value *components = storage->storage;
>           unsigned vector_count = (MAX2(storage->array_elements, 1) *
> -- 
> 2.4.3
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20151030/aeef6abd/attachment.sig>


More information about the mesa-dev mailing list