[Mesa-dev] [PATCH 05/25] i965: Define and initialize image meta-data structure.

Paul Berry stereotype441 at gmail.com
Mon Dec 30 17:51:57 PST 2013


On 2 December 2013 11:39, Francisco Jerez <currojerez at riseup.net> wrote:

> This will be used to pass image information to the shader when we
> cannot use typed surface reads and writes.  All entries except
> surface_idx and size are otherwise unused and will get eliminated by
> the uniform packing pass.  size will be used for bounds checking with
> some image formats and will be useful for ARB_shader_image_size too.
> surface_idx is always used.
> ---
>  src/mesa/drivers/dri/i965/brw_context.h           | 42 +++++++++++++
>  src/mesa/drivers/dri/i965/brw_program.c           |  5 ++
>  src/mesa/drivers/dri/i965/brw_vec4_gs.c           |  4 ++
>  src/mesa/drivers/dri/i965/brw_vs.c                |  7 ++-
>  src/mesa/drivers/dri/i965/brw_wm.c                |  7 ++-
>  src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 76
> +++++++++++++++++++++++
>  6 files changed, 138 insertions(+), 3 deletions(-)
>

There's something going on in this patch that seems really dodgy to me.  I
haven't yet figured out whether it will lead to user-visible bugs, but it
breaks a key assumption that we make elsewhere in the driver.  That
assumption is: the data contained in brw_stage_prog_data (and transitively
contained in other data structures owned by brw_stage_prog_data) is
initialized at the time the shader is compiled, and is thereafter
constant.  The only circumstance in which it's ok for brw_stage_prog_data
to point to data that gets modified later is if that data is owned by
another data structure (this is what we do for uniforms, and it's the
reason why the types of brw_stage_prog_data::param and
brw_stage_prog_data::pull_param are "const float **".  What's going on is
that brw_stage_prog_data points to an array of const float *'s, which it
owns, and then those pointers point to the current values of the uniforms,
which are owned by core mesa.

Your patch introduces data that is owned by brw_stage_prog_data (in the
sense that it gets deleted when brw_stage_prog_data gets deleted), but
which is not initialized at shader compilation time; instead it is
initialized at the time of brw_upload_image_surfaces().  Additionally,
pointers in brw_stage_prog_data::param and brw_stage_prog_data::pull_param
point to these values.  That leads to an unexpected asymmetry--push and
pull parameters that are image parameters are owned by brw_stage_prog_data,
and pointed to in two separate ways, whereas all other push and pull
parameters are owned by other data structures.

Another problem is that brw_stage_prog_data_compare() compares the contents
of the image data.  However, brw_stage_prog_data_compare() is called just
after compilation, to see if the program that has just been compiled
matches a program that was previously compiled; if so we can re-use the
previous program.  (This allows us to avoid inefficiency in the case where
the we thought we had to do a state-dependent recompile, but once the
recompile completes we have the same program we had before).  Since the
data isn't initialized until brw_upload_image_surfaces(), your patch will
cause brw_stage-prog_data_compare() to compare initialized data with
uninitialized data, defeating this optimization.

What I think we should do instead is have a fixed array of BRW_MAX_IMAGES
brw_image_param objects in brw_stage_state.  brw_upload_image_surfaces()
already has a pointer to the brw_stage_state, so it can just refer to this
directly when calling brw->vtbl.update_image_surface().  And
backend_visitor can easily be modified to have a pointer to the
brw_stage_state as well, so backend_visitor::setup_image_uniform_values()
can use that to find the brw_image_param objects.  That way we can remove
the brw_image_param pointer from brw_stage_prog_data, and then image params
will work exactly the same way that all other pull/push constants work.


>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 0816912..dc606c0f 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -352,6 +352,7 @@ struct brw_stage_prog_data {
>
>     GLuint nr_params;       /**< number of float params/constants */
>     GLuint nr_pull_params;
> +   GLuint nr_image_params;
>
>     /* Pointers to tracked values (only valid once
>      * _mesa_load_state_parameters has been called at runtime).
> @@ -361,6 +362,47 @@ struct brw_stage_prog_data {
>      */
>     const float **param;
>     const float **pull_param;
> +   struct brw_image_param *image_param;
> +};
> +
> +/*
> + * Image meta-data structure as laid out in the shader parameter
> + * buffer.  Entries have to be 16B-aligned for the vec4 back-end to be
> + * able to use them.  That's okay because the padding and any unused
> + * entries [most of them except when we're doing untyped surface
> + * access] will be removed by the uniform packing pass.
> + */
> +#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET      0
> +#define BRW_IMAGE_PARAM_OFFSET_OFFSET           4
> +#define BRW_IMAGE_PARAM_SIZE_OFFSET             8
> +#define BRW_IMAGE_PARAM_STRIDE_OFFSET           12
> +#define BRW_IMAGE_PARAM_TILING_OFFSET           16
> +#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET        20
> +#define BRW_IMAGE_PARAM_SIZE                    24
> +
> +struct brw_image_param {
> +   /** Surface binding table index. */
> +   uint32_t surface_idx;
> +
> +   /** Surface X, Y and Z dimensions. */
> +   uint32_t size[3];
> +
> +   /** Offset applied to the X and Y surface coordinates. */
> +   uint32_t offset[2];
> +
> +   /** X-stride in bytes, Y-stride in bytes, horizontal Z-stride in
> +    * pixels, vertical Z-stride in pixels.
> +    */
>

The terms "X-stride", "Y-stride", "horizontal Z-stride", and "vertical
Z-stride" need more explanation.

Also, for multi-line Doxygen-style comments the Mesa convention is to do:

/**
 * text text
 * text text
 */


> +   uint32_t stride[4];
>

On first reading, it was not obvious to me why these values are grouped
into a single array rather than split out into 4 separate fields.  Can we
add a comment to clarify?  Perhaps something like "these values are grouped
into an array so that the shader can access them as the components of a
single vec4."


> +
> +   /** Log2 of the tiling modulus in the X, Y and Z dimension. */
> +   uint32_t tiling[3];
>

The term "tiling modulus" needs more explanation.


> +
> +   /** Right shift to apply for surface address swizzling.  Two
> +    * different swizzles can be specified and will be applied one
> +    * after the other.  Use \c 0xff if any of the swizzles is not
> +    * required. */
>

Let's add a sentence to this comment explaining that these swizzles will
only be applied if brw->has_swizzling is true.


> +   uint32_t swizzling[2];
>  };
>
>  /* Data about a particular attempt to compile a program.  Note that
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c
> b/src/mesa/drivers/dri/i965/brw_program.c
> index 908782b..094deeb 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -552,6 +552,10 @@ brw_stage_prog_data_compare(const void *in_a, const
> void *in_b)
>     if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params *
> sizeof(void *)))
>        return false;
>
> +   if (memcmp(a->image_param, b->image_param,
> +              a->nr_image_params * sizeof(struct brw_image_param)))
> +      return false;
> +
>     return true;
>  }
>
> @@ -562,4 +566,5 @@ brw_stage_prog_data_free(const void *p)
>
>     ralloc_free(prog_data->param);
>     ralloc_free(prog_data->pull_param);
> +   ralloc_free(prog_data->image_param);
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> index 8dbd1e8..5583bfd 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> @@ -61,11 +61,15 @@ do_gs_prog(struct brw_context *brw,
>
>     /* We also upload clip plane data as uniforms */
>     param_count += MAX_CLIP_PLANES * 4;
> +   param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
>
>     c.prog_data.base.base.param =
>        rzalloc_array(NULL, const float *, param_count);
>     c.prog_data.base.base.pull_param =
>        rzalloc_array(NULL, const float *, param_count);
> +   c.prog_data.base.base.image_param =
> +      rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
> +   c.prog_data.base.base.nr_image_params = gs->NumImages;
>
>     if (gp->program.OutputType == GL_POINTS) {
>        /* When the output type is points, the geometry shader may output
> data
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c
> b/src/mesa/drivers/dri/i965/brw_vs.c
> index e9f92d4..8118b7e 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -231,8 +231,9 @@ do_vs_prog(struct brw_context *brw,
>         * case being a float value that gets blown up to a vec4, so be
>         * conservative here.
>         */
> -      param_count = vs->num_uniform_components * 4;
> -
> +      param_count = (vs->num_uniform_components * 4  +
> +                     vs->NumImages * BRW_IMAGE_PARAM_SIZE);
> +      stage_prog_data->nr_image_params = vs->NumImages;
>     } else {
>        param_count = vp->program.Base.Parameters->NumParameters * 4;
>     }
> @@ -243,6 +244,8 @@ do_vs_prog(struct brw_context *brw,
>
>     stage_prog_data->param = rzalloc_array(NULL, const float *,
> param_count);
>     stage_prog_data->pull_param = rzalloc_array(NULL, const float *,
> param_count);
> +   stage_prog_data->image_param = rzalloc_array(NULL, struct
> brw_image_param,
> +
>  stage_prog_data->nr_image_params);
>
>     GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
>     prog_data.inputs_read = vp->program.Base.InputsRead;
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c
> b/src/mesa/drivers/dri/i965/brw_wm.c
> index d78d8e3..790c98b 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -148,7 +148,9 @@ bool do_wm_prog(struct brw_context *brw,
>      */
>     int param_count;
>     if (fs) {
> -      param_count = fs->num_uniform_components;
> +      param_count = (fs->num_uniform_components +
> +                     fs->NumImages * BRW_IMAGE_PARAM_SIZE);
> +      c->prog_data.base.nr_image_params = fs->NumImages;
>     } else {
>        param_count = fp->program.Base.Parameters->NumParameters * 4;
>     }
> @@ -157,6 +159,9 @@ bool do_wm_prog(struct brw_context *brw,
>     c->prog_data.base.param = rzalloc_array(NULL, const float *,
> param_count);
>     c->prog_data.base.pull_param =
>        rzalloc_array(NULL, const float *, param_count);
> +   c->prog_data.base.image_param =
> +      rzalloc_array(NULL, struct brw_image_param,
> +                    c->prog_data.base.nr_image_params);
>     c->prog_data.base.nr_params = param_count;
>
>     memcpy(&c->key, key, sizeof(*key));
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> index 532a18c..38df1be 100644
> --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> @@ -678,6 +678,78 @@ get_image_format(struct brw_context *brw, gl_format
> format)
>  }
>
>  static void
> +update_buffer_image_param(struct brw_context *brw,
> +                          struct gl_image_unit *u,
> +                          struct brw_image_param *param)
> +{
> +   struct gl_buffer_object *obj = u->TexObj->BufferObject;
> +
> +   param->size[0] = (obj->Size /
> _mesa_get_format_bytes(u->_ActualFormat));
> +   param->size[1] = 0;
> +   param->size[2] = 0;
> +   param->offset[0] = 0;
> +   param->offset[1] = 0;
> +   param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
> +   param->stride[1] = 0;
> +   param->stride[2] = 0;
> +   param->stride[3] = 0;
> +   param->tiling[0] = 0;
> +   param->tiling[1] = 0;
> +   param->tiling[2] = 0;
> +   param->swizzling[0] = 0xff;
> +   param->swizzling[1] = 0xff;
> +}
> +
> +static void
> +update_texture_image_param(struct brw_context *brw,
> +                           struct gl_image_unit *u,
> +                           struct brw_image_param *param)
> +{
> +   struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
> +
> +   param->size[0] = minify(mt->logical_width0, u->Level);
> +   param->size[1] = minify(mt->logical_height0, u->Level);
> +
> +   if (u->Layered)
> +      param->size[2] = minify(mt->logical_depth0, u->Level);
>

Minification should only occur if this is a 3D texture.


> +   else
> +      param->size[2] = 1;
> +
> +   intel_miptree_get_image_offset(mt, u->Level, u->Layer,
> +                                  &param->offset[0],
> +                                  &param->offset[1]);
> +
> +   param->stride[0] = mt->region->cpp;
> +   param->stride[1] = mt->region->pitch;
> +   param->stride[2] =
> +      brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
> +   param->stride[3] =
> +      brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
> +
> +   if (mt->region->tiling == I915_TILING_X) {
> +      param->tiling[0] = (ffs(512 / mt->region->cpp) - 1);
> +      param->tiling[1] = 3;
> +      param->swizzling[0] = 3;
> +      param->swizzling[1] = 4;
> +   } else if (mt->region->tiling == I915_TILING_Y) {
> +      param->tiling[0] = (ffs(16 / mt->region->cpp) - 1);
> +      param->tiling[1] = 5;
> +      param->swizzling[0] = 3;
> +      param->swizzling[1] = 0xff;
> +   } else {
> +      param->tiling[0] = 0;
> +      param->tiling[1] = 0;
> +      param->swizzling[0] = 0xff;
> +      param->swizzling[1] = 0xff;
> +   }
> +
> +   if (u->TexObj->Target == GL_TEXTURE_3D)
> +      param->tiling[2] = u->Level;
> +   else
> +      param->tiling[2] = 0;
> +}
> +
> +static void
>  gen7_update_image_surface(struct brw_context *brw,
>                            struct gl_image_unit *u,
>                            GLenum access,
> @@ -709,6 +781,8 @@ gen7_update_image_surface(struct brw_context *brw,
>                                       0 /* mocs */,
>                                       access != GL_READ_ONLY);
>
> +      update_buffer_image_param(brw, u, param);
> +
>     } else {
>        struct intel_texture_object *intel_obj = intel_texture_object(obj);
>        struct intel_mipmap_tree *mt = intel_obj->mt;
> @@ -735,6 +809,8 @@ gen7_update_image_surface(struct brw_context *brw,
>                                           access != GL_READ_ONLY,
>                                           false);
>        }
> +
> +      update_texture_image_param(brw, u, param);
>     }
>  }
>
> --
> 1.8.3.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131230/d56c83cc/attachment-0001.html>


More information about the mesa-dev mailing list