[Mesa-dev] [RFC] nir: const_index sanity

Jason Ekstrand jason at jlekstrand.net
Wed Jan 13 17:48:02 PST 2016


On Jan 13, 2016 4:03 PM, "Rob Clark" <robdclark at gmail.com> wrote:
>
> From: Rob Clark <robclark at freedesktop.org>
>
> ---
> An idea for how to bring some sanity to the wild-west of intrinsic
> const_index[] usage.  Also w/ nir_print support, which could be
> split into other patch, but makes the nir_print output a bit nicer:
>
>   intrinsic store_output (ssa_210, ssa_66) () (0, 15) /* base=0
wrmask=xyzw */
>
> (and already made me realize that ttn was neglecting to set wrmask on
> store_output's)
>
> Probably I'd add "setter" functions to, and then in follow-on patches,
> update the gazillion places where const_index[] access is open-coded.
>
> But first, before big conflicty changes like that, I figured I see what
> others thought.  The other variation of the idea is to simply drop the
> const_index[] field and replace w/ 'unsigned wrmask' and 'int base'.
> Although that would be a bigger more flag-day sort of patch.

We really need to do something here and what you've done is a pretty clever
way to handle the problem.  I'll have to give it a bit more thought before
I'll whole-heartedly endorse it, but a first brush looks pretty good.

A few minor comments below.

> BR,
> -R
>
>  src/glsl/nir/nir.h            |  48 +++++++++++-
>  src/glsl/nir/nir_intrinsics.c |  11 ++-
>  src/glsl/nir/nir_intrinsics.h | 178
+++++++++++++++++++++---------------------
>  src/glsl/nir/nir_print.c      |  30 ++++---
>  4 files changed, 166 insertions(+), 101 deletions(-)
>
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index bedcc0d..2235154 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -786,7 +786,7 @@ typedef struct {
>  } nir_call_instr;
>
>  #define INTRINSIC(name, num_srcs, src_components, has_dest,
dest_components, \
> -                  num_variables, num_indices, flags) \
> +                  num_variables, num_indices, idx0, idx1, idx2, flags) \
>     nir_intrinsic_##name,
>
>  #define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name,
> @@ -799,6 +799,8 @@ typedef enum {
>  #undef INTRINSIC
>  #undef LAST_INTRINSIC
>
> +#define NIR_INTRINSIC_MAX_CONST_INDEX 3
> +
>  /** Represents an intrinsic
>   *
>   * An intrinsic is an instruction type for handling things that are
> @@ -842,7 +844,7 @@ typedef struct {
>      */
>     uint8_t num_components;
>
> -   int const_index[3];
> +   int const_index[NIR_INTRINSIC_MAX_CONST_INDEX];
>
>     nir_deref_var *variables[2];
>
> @@ -871,6 +873,29 @@ typedef enum {
>     NIR_INTRINSIC_CAN_REORDER = (1 << 1),
>  } nir_intrinsic_semantic_flag;
>
> +/**
> + * \name NIR intrinsics const-index flag
> + *
> + * Indicates the usage of a const_index slot.
> + *
> + * \sa nir_intrinsic_info::index_map
> + */
> +typedef enum {
> +   /**
> +    * Generally instructions that take a offset src argument, can encode
> +    * a constant 'base' value which is added to the offset.
> +    */
> +   NIR_INTRINSIC_BASE = 1,
> +
> +   /**
> +    * For store instructions, a writemask for the store.
> +    */
> +   NIR_INTRINSIC_WRMASK = 2,
> +
> +   NIR_INTRINSIC_NUM_INDEX_FLAGS,
> +
> +} nir_intrinsic_index_flag;
> +
>  #define NIR_INTRINSIC_MAX_INPUTS 4
>
>  typedef struct {
> @@ -900,12 +925,31 @@ typedef struct {
>     /** the number of constant indices used by the intrinsic */
>     unsigned num_indices;
>
> +   /** indicates the usage of intr->const_index[n] */
> +   unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
> +
>     /** semantic flags for calls to this intrinsic */
>     nir_intrinsic_semantic_flag flags;
>  } nir_intrinsic_info;
>
>  extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
>
> +static inline unsigned
> +nir_intrinsic_write_mask(nir_intrinsic_instr *instr)
> +{
> +   const nir_intrinsic_info *info =
&nir_intrinsic_infos[instr->intrinsic];
> +   assert(info->index_map[NIR_INTRINSIC_WRMASK] > 0);
> +   return instr->const_index[info->index_map[NIR_INTRINSIC_WRMASK] - 1];
> +}
> +
> +static inline int
> +nir_intrinsic_base(nir_intrinsic_instr *instr)
> +{
> +   const nir_intrinsic_info *info =
&nir_intrinsic_infos[instr->intrinsic];
> +   assert(info->index_map[NIR_INTRINSIC_BASE] > 0);
> +   return instr->const_index[info->index_map[NIR_INTRINSIC_BASE] - 1];
> +}
> +
>  /**
>   * \group texture information
>   *
> diff --git a/src/glsl/nir/nir_intrinsics.c b/src/glsl/nir/nir_intrinsics.c
> index a7c868c..7dddc70 100644
> --- a/src/glsl/nir/nir_intrinsics.c
> +++ b/src/glsl/nir/nir_intrinsics.c
> @@ -30,7 +30,8 @@
>  #define OPCODE(name) nir_intrinsic_##name
>
>  #define INTRINSIC(_name, _num_srcs, _src_components, _has_dest, \
> -                  _dest_components, _num_variables, _num_indices,
_flags) \
> +                  _dest_components, _num_variables, _num_indices, \
> +                  idx0, idx1, idx2, _flags) \
>  { \
>     .name = #_name, \
>     .num_srcs = _num_srcs, \
> @@ -39,9 +40,15 @@
>     .dest_components = _dest_components, \
>     .num_variables = _num_variables, \
>     .num_indices = _num_indices, \
> -   .flags = _flags \
> +   .index_map = { \
> +      [NIR_INTRINSIC_ ## idx0] = 1, \
> +      [NIR_INTRINSIC_ ## idx1] = 2, \
> +      [NIR_INTRINSIC_ ## idx2] = 3, \
> +   }, \
>  },
>
> +#define NIR_INTRINSIC_xx 0
> +
>  #define LAST_INTRINSIC(name)
>
>  const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = {
> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
> index 62eead4..fd46692 100644
> --- a/src/glsl/nir/nir_intrinsics.h
> +++ b/src/glsl/nir/nir_intrinsics.h
> @@ -30,7 +30,7 @@
>   * expands to a list of macros of the form:
>   *
>   * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components,
> - *              num_variables, num_indices, flags)
> + *              num_variables, num_indices, idx0, idx1, idx2, flags)
>   *
>   * Which should correspond one-to-one with the nir_intrinsic_info
structure. It
>   * is included in both ir.h to create the nir_intrinsic enum (with
members of
> @@ -42,9 +42,9 @@
>  #define ARR(...) { __VA_ARGS__ }
>
>
> -INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
> -INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)
> -INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
> +INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx,
NIR_INTRINSIC_CAN_ELIMINATE)
> +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)
> +INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0)
>
>  /*
>   * Interpolation of input.  The interp_var_at* intrinsics are similar to
the
> @@ -54,25 +54,25 @@ INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
>   * respectively.
>   */
>
> -INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0,
> +INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0, xx, xx, xx,
>            NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
> -INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0,
> +INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0, xx, xx, xx,
>            NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
> -INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
> +INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, xx, xx, xx,
>            NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
>
>  /*
>   * Ask the driver for the size of a given buffer. It takes the buffer
index
>   * as source.
>   */
> -INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
> +INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, xx,
>            NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
>
>  /*
>   * a barrier is an intrinsic with no inputs/outputs but which can't be
moved
>   * around/optimized in general
>   */
> -#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0)
> +#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx,
xx, 0)
>
>  BARRIER(barrier)
>  BARRIER(discard)
> @@ -89,7 +89,7 @@ BARRIER(memory_barrier)
>   * The latter can be used as code motion barrier, which is currently not
>   * feasible with NIR.
>   */
> -INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0,
NIR_INTRINSIC_CAN_ELIMINATE)
> +INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx,
NIR_INTRINSIC_CAN_ELIMINATE)
>
>  /*
>   * Memory barrier with semantics analogous to the compute shader
> @@ -103,7 +103,7 @@ BARRIER(memory_barrier_image)
>  BARRIER(memory_barrier_shared)
>
>  /** A conditional discard, with a single boolean source. */
> -INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
> +INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
>
>  /**
>   * Basic Geometry Shader intrinsics.
> @@ -113,8 +113,9 @@ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
>   *
>   * end_primitive implements GLSL's EndPrimitive() built-in.
>   */
> -INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, 0)
> -INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
> +// ???

The const index here is the TF stream I'd.

> +INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, WRMASK, xx, xx, 0)
> +INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, WRMASK, xx, xx, 0)
>
>  /**
>   * Geometry Shader intrinsics with a vertex count.
> @@ -125,9 +126,9 @@ INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
>   * These maintain a count of the number of vertices emitted, as an
additional
>   * unsigned integer source.
>   */
> -INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
> -INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
> -INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
> +INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, WRMASK,
xx, xx, 0)
> +INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, WRMASK,
xx, xx, 0)
> +INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
>
>  /*
>   * Atomic counters
> @@ -137,8 +138,8 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0,
0, 0)
>   */
>
>  #define ATOMIC(name, flags) \
> -   INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0,
flags) \
> -   INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags)
> +   INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx,
xx, xx, flags) \
> +   INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx,
xx, flags)
>
>  ATOMIC(inc, 0)
>  ATOMIC(dec, 0)
> @@ -159,20 +160,20 @@ ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)
>   * either one or two additional scalar arguments with the same meaning
as in
>   * the ARB_shader_image_load_store specification.
>   */
> -INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0,
> +INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, xx, xx, xx,
>            NIR_INTRINSIC_CAN_ELIMINATE)
> -INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0)
> -INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
> -INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
> -INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
> -INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
> -INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
> -INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
> -INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
> -INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
> -INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
> +INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, xx, xx, xx, 0)
> +INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx,
0)
> +INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx,
0)
> +INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx,
0)
> +INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx,
0)
> +INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
> +INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx,
0)
> +INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx,
xx, 0)
> +INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx,
xx, xx, 0)
> +INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx,
>            NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
> -INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
> +INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx,
>            NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
>
>  /*
> @@ -192,16 +193,16 @@ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
>   *    in ssbo_atomic_add, etc).
>   * 3: For CompSwap only: the second data parameter.
>   */
> -INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
> +INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx,
0)
> +INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx,
0)
> +INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx,
0)
> +INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx,
0)
> +INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx,
xx, 0)
> +INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, xx,
xx, xx, 0)
>
>  /*
>   * CS shared variable atomic intrinsics
> @@ -219,42 +220,43 @@ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1,
1), true, 1, 0, 0, 0)
>   *    in shared_atomic_add, etc).
>   * 2: For CompSwap only: the second data parameter.
>   */
> -INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0)
> -INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
> +INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
> +INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx,
xx, 0)
> +INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx,
xx, xx, 0)
>
> -#define SYSTEM_VALUE(name, components, num_indices) \
> +#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
>     INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
> +   idx0, idx1, idx2, \
>     NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
>
> -SYSTEM_VALUE(front_face, 1, 0)
> -SYSTEM_VALUE(vertex_id, 1, 0)
> -SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
> -SYSTEM_VALUE(base_vertex, 1, 0)
> -SYSTEM_VALUE(instance_id, 1, 0)
> -SYSTEM_VALUE(base_instance, 1, 0)
> -SYSTEM_VALUE(draw_id, 1, 0)
> -SYSTEM_VALUE(sample_id, 1, 0)
> -SYSTEM_VALUE(sample_pos, 2, 0)
> -SYSTEM_VALUE(sample_mask_in, 1, 0)
> -SYSTEM_VALUE(primitive_id, 1, 0)
> -SYSTEM_VALUE(invocation_id, 1, 0)
> -SYSTEM_VALUE(tess_coord, 3, 0)
> -SYSTEM_VALUE(tess_level_outer, 4, 0)
> -SYSTEM_VALUE(tess_level_inner, 2, 0)
> -SYSTEM_VALUE(patch_vertices_in, 1, 0)
> -SYSTEM_VALUE(local_invocation_id, 3, 0)
> -SYSTEM_VALUE(work_group_id, 3, 0)
> -SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is
user_clip_plane[idx] */
> -SYSTEM_VALUE(num_work_groups, 3, 0)
> -SYSTEM_VALUE(helper_invocation, 1, 0)
> +SYSTEM_VALUE(front_face, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(vertex_id, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(vertex_id_zero_base, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(base_vertex, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(instance_id, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(base_instance, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(draw_id, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(sample_id, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(sample_pos, 2, 0, xx, xx, xx)
> +SYSTEM_VALUE(sample_mask_in, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(primitive_id, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(invocation_id, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(tess_coord, 3, 0, xx, xx, xx)
> +SYSTEM_VALUE(tess_level_outer, 4, 0, xx, xx, xx)
> +SYSTEM_VALUE(tess_level_inner, 2, 0, xx, xx, xx)
> +SYSTEM_VALUE(patch_vertices_in, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(local_invocation_id, 3, 0, xx, xx, xx)
> +SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
> +SYSTEM_VALUE(user_clip_plane, 4, 1, BASE, xx, xx) /* const_index[0] is
user_clip_plane[idx] */

I'd rather give this it's own name

> +SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
> +SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
>
>  /*
>   * Load operations pull data from some piece of GPU memory.  All load
> @@ -274,25 +276,25 @@ SYSTEM_VALUE(helper_invocation, 1, 0)
>   * offsets are always in bytes.
>   */
>
> -#define LOAD(name, srcs, indices, flags) \
> -   INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices,
flags)
> +#define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \
> +   INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0,
num_indices, idx0, idx1, idx2, flags)
>
>  /* src[] = { offset }. const_index[] = { base } */
> -LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE |
NIR_INTRINSIC_CAN_REORDER)
> +LOAD(uniform, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE |
NIR_INTRINSIC_CAN_REORDER)
>  /* src[] = { buffer_index, offset }. No const_index */
> -LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
> +LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE |
NIR_INTRINSIC_CAN_REORDER)
>  /* src[] = { offset }. const_index[] = { base } */
> -LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE |
NIR_INTRINSIC_CAN_REORDER)
> +LOAD(input, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE |
NIR_INTRINSIC_CAN_REORDER)
>  /* src[] = { vertex, offset }. const_index[] = { base } */
> -LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE |
NIR_INTRINSIC_CAN_REORDER)
> +LOAD(per_vertex_input, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE |
NIR_INTRINSIC_CAN_REORDER)
>  /* src[] = { buffer_index, offset }. No const_index */
> -LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE)
> +LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
>  /* src[] = { offset }. const_index[] = { base } */
> -LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
> +LOAD(output, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
>  /* src[] = { vertex, offset }. const_index[] = { base } */
> -LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE)
> +LOAD(per_vertex_output, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
>  /* src[] = { offset }. const_index[] = { base } */
> -LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
> +LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
>
>  /*
>   * Stores work the same way as loads, except now the first source is the
value
> @@ -301,16 +303,16 @@ LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
>   * const_index[0].
>   */
>
> -#define STORE(name, srcs, indices, flags) \
> -   INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices,
flags)
> +#define STORE(name, srcs, num_indices, idx0, idx1, idx2, flags) \
> +   INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0,
num_indices, idx0, idx1, idx2, flags)
>
>  /* src[] = { value, offset }. const_index[] = { base, write_mask } */
> -STORE(output, 2, 2, 0)
> +STORE(output, 2, 2, BASE, WRMASK, xx, 0)
>  /* src[] = { value, vertex, offset }. const_index[] = { base, write_mask
} */
> -STORE(per_vertex_output, 3, 2, 0)
> +STORE(per_vertex_output, 3, 2, BASE, WRMASK, xx, 0)
>  /* src[] = { value, block_index, offset }. const_index[] = { write_mask
} */
> -STORE(ssbo, 3, 1, 0)
> +STORE(ssbo, 3, 1, WRMASK, xx, xx, 0)
>  /* src[] = { value, offset }. const_index[] = { base, write_mask } */
> -STORE(shared, 2, 2, 0)
> +STORE(shared, 2, 2, BASE, WRMASK, xx, 0)
>
>  LAST_INTRINSIC(store_shared)
> diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
> index 80638ed..96caf0a 100644
> --- a/src/glsl/nir/nir_print.c
> +++ b/src/glsl/nir/nir_print.c
> @@ -444,15 +444,16 @@ print_deref(nir_deref_var *deref, print_state
*state)
>  static void
>  print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
>  {
> -   unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
> +   const nir_intrinsic_info *info =
&nir_intrinsic_infos[instr->intrinsic];
> +   unsigned num_srcs = info->num_srcs;
>     FILE *fp = state->fp;
>
> -   if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
> +   if (info->has_dest) {
>        print_dest(&instr->dest, state);
>        fprintf(fp, " = ");
>     }
>
> -   fprintf(fp, "intrinsic %s (",
nir_intrinsic_infos[instr->intrinsic].name);
> +   fprintf(fp, "intrinsic %s (", info->name);
>
>     for (unsigned i = 0; i < num_srcs; i++) {
>        if (i != 0)
> @@ -463,9 +464,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr,
print_state *state)
>
>     fprintf(fp, ") (");
>
> -   unsigned num_vars =
nir_intrinsic_infos[instr->intrinsic].num_variables;
> -
> -   for (unsigned i = 0; i < num_vars; i++) {
> +   for (unsigned i = 0; i < info->num_variables; i++) {
>        if (i != 0)
>           fprintf(fp, ", ");
>
> @@ -474,9 +473,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr,
print_state *state)
>
>     fprintf(fp, ") (");
>
> -   unsigned num_indices =
nir_intrinsic_infos[instr->intrinsic].num_indices;
> -
> -   for (unsigned i = 0; i < num_indices; i++) {
> +   for (unsigned i = 0; i < info->num_indices; i++) {
>        if (i != 0)
>           fprintf(fp, ", ");
>
> @@ -485,6 +482,21 @@ print_intrinsic_instr(nir_intrinsic_instr *instr,
print_state *state)
>
>     fprintf(fp, ")");
>
> +   if (info->index_map[NIR_INTRINSIC_BASE] ||
> +       info->index_map[NIR_INTRINSIC_WRMASK]) {
> +      fprintf(fp, " /*");
> +      if (info->index_map[NIR_INTRINSIC_BASE])
> +         fprintf(fp, " base=%d", nir_intrinsic_base(instr));
> +      if (info->index_map[NIR_INTRINSIC_WRMASK] & NIR_INTRINSIC_WRMASK) {
> +          unsigned wrmask = nir_intrinsic_write_mask(instr);
> +          fprintf(fp, " wrmask=");
> +          for (unsigned i = 0; i < 4; i++)
> +             if ((wrmask >> i) & 1)
> +                fprintf(fp, "%c", "xyzw"[i]);
> +      }
> +      fprintf(fp, " */");
> +   }
> +
>     if (!state->shader)
>        return;
>
> --
> 2.5.0
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20160113/f9ab632b/attachment-0001.html>


More information about the mesa-dev mailing list