<p dir="ltr"><br>
On Jan 13, 2016 4:03 PM, "Rob Clark" <<a href="mailto:robdclark@gmail.com">robdclark@gmail.com</a>> wrote:<br>
><br>
> From: Rob Clark <<a href="mailto:robclark@freedesktop.org">robclark@freedesktop.org</a>><br>
><br>
> ---<br>
> An idea for how to bring some sanity to the wild-west of intrinsic<br>
> const_index[] usage. Also w/ nir_print support, which could be<br>
> split into other patch, but makes the nir_print output a bit nicer:<br>
><br>
> intrinsic store_output (ssa_210, ssa_66) () (0, 15) /* base=0 wrmask=xyzw */<br>
><br>
> (and already made me realize that ttn was neglecting to set wrmask on<br>
> store_output's)<br>
><br>
> Probably I'd add "setter" functions to, and then in follow-on patches,<br>
> update the gazillion places where const_index[] access is open-coded.<br>
><br>
> But first, before big conflicty changes like that, I figured I see what<br>
> others thought. The other variation of the idea is to simply drop the<br>
> const_index[] field and replace w/ 'unsigned wrmask' and 'int base'.<br>
> Although that would be a bigger more flag-day sort of patch.</p>
<p dir="ltr">We really need to do something here and what you've done is a pretty clever way to handle the problem. I'll have to give it a bit more thought before I'll whole-heartedly endorse it, but a first brush looks pretty good.</p>
<p dir="ltr">A few minor comments below.</p>
<p dir="ltr">> BR,<br>
> -R<br>
><br>
> src/glsl/nir/nir.h | 48 +++++++++++-<br>
> src/glsl/nir/nir_intrinsics.c | 11 ++-<br>
> src/glsl/nir/nir_intrinsics.h | 178 +++++++++++++++++++++---------------------<br>
> src/glsl/nir/nir_print.c | 30 ++++---<br>
> 4 files changed, 166 insertions(+), 101 deletions(-)<br>
><br>
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h<br>
> index bedcc0d..2235154 100644<br>
> --- a/src/glsl/nir/nir.h<br>
> +++ b/src/glsl/nir/nir.h<br>
> @@ -786,7 +786,7 @@ typedef struct {<br>
> } nir_call_instr;<br>
><br>
> #define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \<br>
> - num_variables, num_indices, flags) \<br>
> + num_variables, num_indices, idx0, idx1, idx2, flags) \<br>
> nir_intrinsic_##name,<br>
><br>
> #define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name,<br>
> @@ -799,6 +799,8 @@ typedef enum {<br>
> #undef INTRINSIC<br>
> #undef LAST_INTRINSIC<br>
><br>
> +#define NIR_INTRINSIC_MAX_CONST_INDEX 3<br>
> +<br>
> /** Represents an intrinsic<br>
> *<br>
> * An intrinsic is an instruction type for handling things that are<br>
> @@ -842,7 +844,7 @@ typedef struct {<br>
> */<br>
> uint8_t num_components;<br>
><br>
> - int const_index[3];<br>
> + int const_index[NIR_INTRINSIC_MAX_CONST_INDEX];<br>
><br>
> nir_deref_var *variables[2];<br>
><br>
> @@ -871,6 +873,29 @@ typedef enum {<br>
> NIR_INTRINSIC_CAN_REORDER = (1 << 1),<br>
> } nir_intrinsic_semantic_flag;<br>
><br>
> +/**<br>
> + * \name NIR intrinsics const-index flag<br>
> + *<br>
> + * Indicates the usage of a const_index slot.<br>
> + *<br>
> + * \sa nir_intrinsic_info::index_map<br>
> + */<br>
> +typedef enum {<br>
> + /**<br>
> + * Generally instructions that take a offset src argument, can encode<br>
> + * a constant 'base' value which is added to the offset.<br>
> + */<br>
> + NIR_INTRINSIC_BASE = 1,<br>
> +<br>
> + /**<br>
> + * For store instructions, a writemask for the store.<br>
> + */<br>
> + NIR_INTRINSIC_WRMASK = 2,<br>
> +<br>
> + NIR_INTRINSIC_NUM_INDEX_FLAGS,<br>
> +<br>
> +} nir_intrinsic_index_flag;<br>
> +<br>
> #define NIR_INTRINSIC_MAX_INPUTS 4<br>
><br>
> typedef struct {<br>
> @@ -900,12 +925,31 @@ typedef struct {<br>
> /** the number of constant indices used by the intrinsic */<br>
> unsigned num_indices;<br>
><br>
> + /** indicates the usage of intr->const_index[n] */<br>
> + unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];<br>
> +<br>
> /** semantic flags for calls to this intrinsic */<br>
> nir_intrinsic_semantic_flag flags;<br>
> } nir_intrinsic_info;<br>
><br>
> extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];<br>
><br>
> +static inline unsigned<br>
> +nir_intrinsic_write_mask(nir_intrinsic_instr *instr)<br>
> +{<br>
> + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];<br>
> + assert(info->index_map[NIR_INTRINSIC_WRMASK] > 0);<br>
> + return instr->const_index[info->index_map[NIR_INTRINSIC_WRMASK] - 1];<br>
> +}<br>
> +<br>
> +static inline int<br>
> +nir_intrinsic_base(nir_intrinsic_instr *instr)<br>
> +{<br>
> + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];<br>
> + assert(info->index_map[NIR_INTRINSIC_BASE] > 0);<br>
> + return instr->const_index[info->index_map[NIR_INTRINSIC_BASE] - 1];<br>
> +}<br>
> +<br>
> /**<br>
> * \group texture information<br>
> *<br>
> diff --git a/src/glsl/nir/nir_intrinsics.c b/src/glsl/nir/nir_intrinsics.c<br>
> index a7c868c..7dddc70 100644<br>
> --- a/src/glsl/nir/nir_intrinsics.c<br>
> +++ b/src/glsl/nir/nir_intrinsics.c<br>
> @@ -30,7 +30,8 @@<br>
> #define OPCODE(name) nir_intrinsic_##name<br>
><br>
> #define INTRINSIC(_name, _num_srcs, _src_components, _has_dest, \<br>
> - _dest_components, _num_variables, _num_indices, _flags) \<br>
> + _dest_components, _num_variables, _num_indices, \<br>
> + idx0, idx1, idx2, _flags) \<br>
> { \<br>
> .name = #_name, \<br>
> .num_srcs = _num_srcs, \<br>
> @@ -39,9 +40,15 @@<br>
> .dest_components = _dest_components, \<br>
> .num_variables = _num_variables, \<br>
> .num_indices = _num_indices, \<br>
> - .flags = _flags \<br>
> + .index_map = { \<br>
> + [NIR_INTRINSIC_ ## idx0] = 1, \<br>
> + [NIR_INTRINSIC_ ## idx1] = 2, \<br>
> + [NIR_INTRINSIC_ ## idx2] = 3, \<br>
> + }, \<br>
> },<br>
><br>
> +#define NIR_INTRINSIC_xx 0<br>
> +<br>
> #define LAST_INTRINSIC(name)<br>
><br>
> const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = {<br>
> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h<br>
> index 62eead4..fd46692 100644<br>
> --- a/src/glsl/nir/nir_intrinsics.h<br>
> +++ b/src/glsl/nir/nir_intrinsics.h<br>
> @@ -30,7 +30,7 @@<br>
> * expands to a list of macros of the form:<br>
> *<br>
> * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components,<br>
> - * num_variables, num_indices, flags)<br>
> + * num_variables, num_indices, idx0, idx1, idx2, flags)<br>
> *<br>
> * Which should correspond one-to-one with the nir_intrinsic_info structure. It<br>
> * is included in both ir.h to create the nir_intrinsic enum (with members of<br>
> @@ -42,9 +42,9 @@<br>
> #define ARR(...) { __VA_ARGS__ }<br>
><br>
><br>
> -INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> -INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)<br>
> -INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)<br>
> +INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)<br>
> +INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0)<br>
><br>
> /*<br>
> * Interpolation of input. The interp_var_at* intrinsics are similar to the<br>
> @@ -54,25 +54,25 @@ INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)<br>
> * respectively.<br>
> */<br>
><br>
> -INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0,<br>
> +INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0, xx, xx, xx,<br>
> NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
> -INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0,<br>
> +INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0, xx, xx, xx,<br>
> NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
> -INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,<br>
> +INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, xx, xx, xx,<br>
> NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
><br>
> /*<br>
> * Ask the driver for the size of a given buffer. It takes the buffer index<br>
> * as source.<br>
> */<br>
> -INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,<br>
> +INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, xx,<br>
> NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
><br>
> /*<br>
> * a barrier is an intrinsic with no inputs/outputs but which can't be moved<br>
> * around/optimized in general<br>
> */<br>
> -#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0)<br>
> +#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx, xx, 0)<br>
><br>
> BARRIER(barrier)<br>
> BARRIER(discard)<br>
> @@ -89,7 +89,7 @@ BARRIER(memory_barrier)<br>
> * The latter can be used as code motion barrier, which is currently not<br>
> * feasible with NIR.<br>
> */<br>
> -INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> +INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)<br>
><br>
> /*<br>
> * Memory barrier with semantics analogous to the compute shader<br>
> @@ -103,7 +103,7 @@ BARRIER(memory_barrier_image)<br>
> BARRIER(memory_barrier_shared)<br>
><br>
> /** A conditional discard, with a single boolean source. */<br>
> -INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)<br>
> +INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)<br>
><br>
> /**<br>
> * Basic Geometry Shader intrinsics.<br>
> @@ -113,8 +113,9 @@ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)<br>
> *<br>
> * end_primitive implements GLSL's EndPrimitive() built-in.<br>
> */<br>
> -INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0)<br>
> -INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)<br>
> +// ???</p>
<p dir="ltr">The const index here is the TF stream I'd.</p>
<p dir="ltr">> +INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, WRMASK, xx, xx, 0)<br>
> +INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, WRMASK, xx, xx, 0)<br>
><br>
> /**<br>
> * Geometry Shader intrinsics with a vertex count.<br>
> @@ -125,9 +126,9 @@ INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)<br>
> * These maintain a count of the number of vertices emitted, as an additional<br>
> * unsigned integer source.<br>
> */<br>
> -INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)<br>
> -INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)<br>
> -INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)<br>
> +INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, WRMASK, xx, xx, 0)<br>
> +INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, WRMASK, xx, xx, 0)<br>
> +INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)<br>
><br>
> /*<br>
> * Atomic counters<br>
> @@ -137,8 +138,8 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)<br>
> */<br>
><br>
> #define ATOMIC(name, flags) \<br>
> - INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \<br>
> - INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags)<br>
> + INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx, xx, xx, flags) \<br>
> + INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags)<br>
><br>
> ATOMIC(inc, 0)<br>
> ATOMIC(dec, 0)<br>
> @@ -159,20 +160,20 @@ ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> * either one or two additional scalar arguments with the same meaning as in<br>
> * the ARB_shader_image_load_store specification.<br>
> */<br>
> -INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0,<br>
> +INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, xx, xx, xx,<br>
> NIR_INTRINSIC_CAN_ELIMINATE)<br>
> -INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0)<br>
> -INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)<br>
> -INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)<br>
> -INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)<br>
> -INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)<br>
> -INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)<br>
> -INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)<br>
> -INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)<br>
> -INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)<br>
> -INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,<br>
> +INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx,<br>
> NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
> -INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,<br>
> +INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx,<br>
> NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
><br>
> /*<br>
> @@ -192,16 +193,16 @@ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,<br>
> * in ssbo_atomic_add, etc).<br>
> * 3: For CompSwap only: the second data parameter.<br>
> */<br>
> -INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)<br>
> +INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
><br>
> /*<br>
> * CS shared variable atomic intrinsics<br>
> @@ -219,42 +220,43 @@ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)<br>
> * in shared_atomic_add, etc).<br>
> * 2: For CompSwap only: the second data parameter.<br>
> */<br>
> -INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0)<br>
> -INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)<br>
> +INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
> +INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)<br>
><br>
> -#define SYSTEM_VALUE(name, components, num_indices) \<br>
> +#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \<br>
> INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \<br>
> + idx0, idx1, idx2, \<br>
> NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
><br>
> -SYSTEM_VALUE(front_face, 1, 0)<br>
> -SYSTEM_VALUE(vertex_id, 1, 0)<br>
> -SYSTEM_VALUE(vertex_id_zero_base, 1, 0)<br>
> -SYSTEM_VALUE(base_vertex, 1, 0)<br>
> -SYSTEM_VALUE(instance_id, 1, 0)<br>
> -SYSTEM_VALUE(base_instance, 1, 0)<br>
> -SYSTEM_VALUE(draw_id, 1, 0)<br>
> -SYSTEM_VALUE(sample_id, 1, 0)<br>
> -SYSTEM_VALUE(sample_pos, 2, 0)<br>
> -SYSTEM_VALUE(sample_mask_in, 1, 0)<br>
> -SYSTEM_VALUE(primitive_id, 1, 0)<br>
> -SYSTEM_VALUE(invocation_id, 1, 0)<br>
> -SYSTEM_VALUE(tess_coord, 3, 0)<br>
> -SYSTEM_VALUE(tess_level_outer, 4, 0)<br>
> -SYSTEM_VALUE(tess_level_inner, 2, 0)<br>
> -SYSTEM_VALUE(patch_vertices_in, 1, 0)<br>
> -SYSTEM_VALUE(local_invocation_id, 3, 0)<br>
> -SYSTEM_VALUE(work_group_id, 3, 0)<br>
> -SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */<br>
> -SYSTEM_VALUE(num_work_groups, 3, 0)<br>
> -SYSTEM_VALUE(helper_invocation, 1, 0)<br>
> +SYSTEM_VALUE(front_face, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(vertex_id, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(vertex_id_zero_base, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(base_vertex, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(instance_id, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(base_instance, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(draw_id, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(sample_id, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(sample_pos, 2, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(sample_mask_in, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(primitive_id, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(invocation_id, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(tess_coord, 3, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(tess_level_outer, 4, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(tess_level_inner, 2, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(patch_vertices_in, 1, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(local_invocation_id, 3, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(user_clip_plane, 4, 1, BASE, xx, xx) /* const_index[0] is user_clip_plane[idx] */</p>
<p dir="ltr">I'd rather give this it's own name</p>
<p dir="ltr">> +SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)<br>
> +SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)<br>
><br>
> /*<br>
> * Load operations pull data from some piece of GPU memory. All load<br>
> @@ -274,25 +276,25 @@ SYSTEM_VALUE(helper_invocation, 1, 0)<br>
> * offsets are always in bytes.<br>
> */<br>
><br>
> -#define LOAD(name, srcs, indices, flags) \<br>
> - INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags)<br>
> +#define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \<br>
> + INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags)<br>
><br>
> /* src[] = { offset }. const_index[] = { base } */<br>
> -LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
> +LOAD(uniform, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
> /* src[] = { buffer_index, offset }. No const_index */<br>
> -LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
> +LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
> /* src[] = { offset }. const_index[] = { base } */<br>
> -LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
> +LOAD(input, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
> /* src[] = { vertex, offset }. const_index[] = { base } */<br>
> -LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
> +LOAD(per_vertex_input, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
> /* src[] = { buffer_index, offset }. No const_index */<br>
> -LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> +LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> /* src[] = { offset }. const_index[] = { base } */<br>
> -LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> +LOAD(output, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> /* src[] = { vertex, offset }. const_index[] = { base } */<br>
> -LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> +LOAD(per_vertex_output, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> /* src[] = { offset }. const_index[] = { base } */<br>
> -LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> +LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)<br>
><br>
> /*<br>
> * Stores work the same way as loads, except now the first source is the value<br>
> @@ -301,16 +303,16 @@ LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> * const_index[0].<br>
> */<br>
><br>
> -#define STORE(name, srcs, indices, flags) \<br>
> - INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)<br>
> +#define STORE(name, srcs, num_indices, idx0, idx1, idx2, flags) \<br>
> + INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, num_indices, idx0, idx1, idx2, flags)<br>
><br>
> /* src[] = { value, offset }. const_index[] = { base, write_mask } */<br>
> -STORE(output, 2, 2, 0)<br>
> +STORE(output, 2, 2, BASE, WRMASK, xx, 0)<br>
> /* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */<br>
> -STORE(per_vertex_output, 3, 2, 0)<br>
> +STORE(per_vertex_output, 3, 2, BASE, WRMASK, xx, 0)<br>
> /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */<br>
> -STORE(ssbo, 3, 1, 0)<br>
> +STORE(ssbo, 3, 1, WRMASK, xx, xx, 0)<br>
> /* src[] = { value, offset }. const_index[] = { base, write_mask } */<br>
> -STORE(shared, 2, 2, 0)<br>
> +STORE(shared, 2, 2, BASE, WRMASK, xx, 0)<br>
><br>
> LAST_INTRINSIC(store_shared)<br>
> diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c<br>
> index 80638ed..96caf0a 100644<br>
> --- a/src/glsl/nir/nir_print.c<br>
> +++ b/src/glsl/nir/nir_print.c<br>
> @@ -444,15 +444,16 @@ print_deref(nir_deref_var *deref, print_state *state)<br>
> static void<br>
> print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)<br>
> {<br>
> - unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;<br>
> + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];<br>
> + unsigned num_srcs = info->num_srcs;<br>
> FILE *fp = state->fp;<br>
><br>
> - if (nir_intrinsic_infos[instr->intrinsic].has_dest) {<br>
> + if (info->has_dest) {<br>
> print_dest(&instr->dest, state);<br>
> fprintf(fp, " = ");<br>
> }<br>
><br>
> - fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name);<br>
> + fprintf(fp, "intrinsic %s (", info->name);<br>
><br>
> for (unsigned i = 0; i < num_srcs; i++) {<br>
> if (i != 0)<br>
> @@ -463,9 +464,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)<br>
><br>
> fprintf(fp, ") (");<br>
><br>
> - unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;<br>
> -<br>
> - for (unsigned i = 0; i < num_vars; i++) {<br>
> + for (unsigned i = 0; i < info->num_variables; i++) {<br>
> if (i != 0)<br>
> fprintf(fp, ", ");<br>
><br>
> @@ -474,9 +473,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)<br>
><br>
> fprintf(fp, ") (");<br>
><br>
> - unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices;<br>
> -<br>
> - for (unsigned i = 0; i < num_indices; i++) {<br>
> + for (unsigned i = 0; i < info->num_indices; i++) {<br>
> if (i != 0)<br>
> fprintf(fp, ", ");<br>
><br>
> @@ -485,6 +482,21 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)<br>
><br>
> fprintf(fp, ")");<br>
><br>
> + if (info->index_map[NIR_INTRINSIC_BASE] ||<br>
> + info->index_map[NIR_INTRINSIC_WRMASK]) {<br>
> + fprintf(fp, " /*");<br>
> + if (info->index_map[NIR_INTRINSIC_BASE])<br>
> + fprintf(fp, " base=%d", nir_intrinsic_base(instr));<br>
> + if (info->index_map[NIR_INTRINSIC_WRMASK] & NIR_INTRINSIC_WRMASK) {<br>
> + unsigned wrmask = nir_intrinsic_write_mask(instr);<br>
> + fprintf(fp, " wrmask=");<br>
> + for (unsigned i = 0; i < 4; i++)<br>
> + if ((wrmask >> i) & 1)<br>
> + fprintf(fp, "%c", "xyzw"[i]);<br>
> + }<br>
> + fprintf(fp, " */");<br>
> + }<br>
> +<br>
> if (!state->shader)<br>
> return;<br>
><br>
> --<br>
> 2.5.0<br>
><br>
</p>