<div dir="ltr"><div class="gmail_quote"><div dir="ltr">On Mon, Jul 16, 2018 at 7:29 AM Karol Herbst <<a href="mailto:kherbst@redhat.com">kherbst@redhat.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">With OpenCL the size of some system value depends on the Physical model<br>
choosen, so we need a way to load any system value as 32 or 64 bit.<br>
<br>
We could probably be a lot smarter and specify which system values might<br>
be valid as 32 and/or 64 bit, but I get the feeling it isn't really worth<br>
the effort and we can simply depend on the dest type choosen by the API.<br></blockquote><div><br></div><div>I'm not sure what I think about this. Most system values (other than the few used by OpenCL) are always 32 bits all the time and back-ends are likely to depend on this. At the very least, it might be nice to have some sort of validation that the bit sizes are correct before we get there.</div><div><br></div><div>One solution to this would be to add a dest_bit_size field to nir_intrinsic_info and use the convention of dest_bit_size == 0 means it can be anything. Then nir_builder_opcodes.py can use that to create functions which either require the bit size or don't. Also, we could have the validator properly validate intrinsic destination bit sizes. It's also something that we could do somewhat incramentally because defaulting everything to dest_bit_size = 0 gives the current behavior.</div><div><br></div><div>--Jason<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Signed-off-by: Karol Herbst <<a href="mailto:kherbst@redhat.com" target="_blank">kherbst@redhat.com</a>><br>
---<br>
src/compiler/nir/nir_builder_opcodes_h.py | 9 ++--<br>
src/compiler/nir/nir_lower_alpha_test.c | 2 +-<br>
src/compiler/nir/nir_lower_clip.c | 3 +-<br>
src/compiler/nir/nir_lower_subgroups.c | 8 +--<br>
src/compiler/nir/nir_lower_system_values.c | 49 +++++++++++--------<br>
src/compiler/nir/nir_lower_two_sided_color.c | 2 +-<br>
src/compiler/nir/nir_lower_wpos_center.c | 2 +-<br>
src/compiler/spirv/vtn_subgroup.c | 2 +-<br>
src/gallium/auxiliary/nir/tgsi_to_nir.c | 3 +-<br>
src/intel/blorp/blorp_blit.c | 2 +-<br>
src/intel/blorp/blorp_clear.c | 2 +-<br>
.../compiler/brw_nir_lower_cs_intrinsics.c | 6 +--<br>
src/mesa/drivers/dri/i965/brw_tcs.c | 2 +-<br>
13 files changed, 52 insertions(+), 40 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir_builder_opcodes_h.py b/src/compiler/nir/nir_builder_opcodes_h.py<br>
index 72cf5b4549d..d16dac6b16e 100644<br>
--- a/src/compiler/nir/nir_builder_opcodes_h.py<br>
+++ b/src/compiler/nir/nir_builder_opcodes_h.py<br>
@@ -44,22 +44,23 @@ nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})<br>
<br>
/* Generic builder for system values. */<br>
static inline nir_ssa_def *<br>
-nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)<br>
+nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,<br>
+ unsigned bit_size)<br>
{<br>
nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op);<br>
load->num_components = nir_intrinsic_infos[op].dest_components;<br>
load->const_index[0] = index;<br>
nir_ssa_dest_init(&load->instr, &load->dest,<br>
- nir_intrinsic_infos[op].dest_components, 32, NULL);<br>
+ nir_intrinsic_infos[op].dest_components, bit_size, NULL);<br>
nir_builder_instr_insert(build, &load->instr);<br>
return &load->dest.ssa;<br>
}<br>
<br>
% for name, opcode in filter(lambda v: v[1].sysval, sorted(INTR_OPCODES.iteritems())):<br>
static inline nir_ssa_def *<br>
-nir_${name}(nir_builder *build)<br>
+nir_${name}(nir_builder *build, unsigned bit_size)<br>
{<br>
- return nir_load_system_value(build, nir_intrinsic_${name}, 0);<br>
+ return nir_load_system_value(build, nir_intrinsic_${name}, 0, bit_size);<br>
}<br>
% endfor<br>
<br>
diff --git a/src/compiler/nir/nir_lower_alpha_test.c b/src/compiler/nir/nir_lower_alpha_test.c<br>
index ddd815765bd..8341a0246d2 100644<br>
--- a/src/compiler/nir/nir_lower_alpha_test.c<br>
+++ b/src/compiler/nir/nir_lower_alpha_test.c<br>
@@ -95,7 +95,7 @@ nir_lower_alpha_test(nir_shader *shader, enum compare_func func,<br>
<br>
nir_ssa_def *condition =<br>
nir_compare_func(&b, func,<br>
- alpha, nir_load_alpha_ref_float(&b));<br>
+ alpha, nir_load_alpha_ref_float(&b, 32));<br>
<br>
nir_intrinsic_instr *discard =<br>
nir_intrinsic_instr_create(b.shader,<br>
diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c<br>
index ea12f51a7bb..b9a91f7d40b 100644<br>
--- a/src/compiler/nir/nir_lower_clip.c<br>
+++ b/src/compiler/nir/nir_lower_clip.c<br>
@@ -174,7 +174,8 @@ lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables,<br>
for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {<br>
if (ucp_enables & (1 << plane)) {<br>
nir_ssa_def *ucp =<br>
- nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane, plane);<br>
+ nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane,<br>
+ plane, 32);<br>
<br>
/* calculate clipdist[plane] - dot(ucp, cv): */<br>
clipdist[plane] = nir_fdot4(&b, ucp, cv);<br>
diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c<br>
index ee5e8bd644b..c474b9fd27a 100644<br>
--- a/src/compiler/nir/nir_lower_subgroups.c<br>
+++ b/src/compiler/nir/nir_lower_subgroups.c<br>
@@ -226,7 +226,7 @@ static nir_ssa_def *<br>
lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,<br>
bool lower_to_scalar, bool lower_to_32bit)<br>
{<br>
- nir_ssa_def *index = nir_load_subgroup_invocation(b);<br>
+ nir_ssa_def *index = nir_load_subgroup_invocation(b, 32);<br>
switch (intrin->intrinsic) {<br>
case nir_intrinsic_shuffle_xor:<br>
assert(intrin->src[1].is_ssa);<br>
@@ -338,7 +338,7 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,<br>
assert(options->subgroup_size <= 64);<br>
uint64_t group_mask = ~0ull >> (64 - options->subgroup_size);<br>
<br>
- nir_ssa_def *count = nir_load_subgroup_invocation(b);<br>
+ nir_ssa_def *count = nir_load_subgroup_invocation(b, 32);<br>
nir_ssa_def *val;<br>
switch (intrin->intrinsic) {<br>
case nir_intrinsic_load_subgroup_eq_mask:<br>
@@ -411,7 +411,7 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,<br>
<br>
case nir_intrinsic_ballot_bit_count_exclusive:<br>
case nir_intrinsic_ballot_bit_count_inclusive: {<br>
- nir_ssa_def *count = nir_load_subgroup_invocation(b);<br>
+ nir_ssa_def *count = nir_load_subgroup_invocation(b, 32);<br>
nir_ssa_def *mask = nir_imm_intN_t(b, ~0ull, options->ballot_bit_size);<br>
if (intrin->intrinsic == nir_intrinsic_ballot_bit_count_inclusive) {<br>
const unsigned bits = options->ballot_bit_size;<br>
@@ -434,7 +434,7 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,<br>
nir_ssa_dest_init(&first->instr, &first->dest, 1, 32, NULL);<br>
nir_builder_instr_insert(b, &first->instr);<br>
<br>
- return nir_ieq(b, nir_load_subgroup_invocation(b), &first->dest.ssa);<br>
+ return nir_ieq(b, nir_load_subgroup_invocation(b, 32), &first->dest.ssa);<br>
}<br>
<br>
case nir_intrinsic_shuffle:<br>
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c<br>
index da04895d66c..41f939dd935 100644<br>
--- a/src/compiler/nir/nir_lower_system_values.c<br>
+++ b/src/compiler/nir/nir_lower_system_values.c<br>
@@ -29,14 +29,22 @@<br>
#include "nir_builder.h"<br>
<br>
static nir_ssa_def*<br>
-build_local_group_size(nir_builder *b)<br>
+build_local_group_size(nir_builder *b, unsigned bit_size)<br>
{<br>
nir_const_value local_size;<br>
memset(&local_size, 0, sizeof(local_size));<br>
- local_size.u32[0] = b->shader->info.cs.local_size[0];<br>
- local_size.u32[1] = b->shader->info.cs.local_size[1];<br>
- local_size.u32[2] = b->shader->info.cs.local_size[2];<br>
- return nir_build_imm(b, 3, 32, local_size);<br>
+ if (bit_size == 64) {<br>
+ local_size.u64[0] = b->shader->info.cs.local_size[0];<br>
+ local_size.u64[1] = b->shader->info.cs.local_size[1];<br>
+ local_size.u64[2] = b->shader->info.cs.local_size[2];<br>
+ } else if (bit_size == 32) {<br>
+ local_size.u32[0] = b->shader->info.cs.local_size[0];<br>
+ local_size.u32[1] = b->shader->info.cs.local_size[1];<br>
+ local_size.u32[2] = b->shader->info.cs.local_size[2];<br>
+ } else {<br>
+ assert(!"local group size can't be smaller than 32 bits");<br>
+ }<br>
+ return nir_build_imm(b, 3, bit_size, local_size);<br>
}<br>
<br>
static bool<br>
@@ -67,6 +75,7 @@ convert_block(nir_block *block, nir_builder *b)<br>
}<br>
nir_variable *var = deref->var;<br>
<br>
+ unsigned bit_size = load_deref->dest.ssa.bit_size;<br>
b->cursor = nir_after_instr(&load_deref->instr);<br>
<br>
nir_ssa_def *sysval = NULL;<br>
@@ -77,9 +86,9 @@ convert_block(nir_block *block, nir_builder *b)<br>
* "The value of gl_GlobalInvocationID is equal to<br>
* gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"<br>
*/<br>
- nir_ssa_def *group_size = build_local_group_size(b);<br>
- nir_ssa_def *group_id = nir_load_work_group_id(b);<br>
- nir_ssa_def *local_id = nir_load_local_invocation_id(b);<br>
+ nir_ssa_def *group_size = build_local_group_size(b, bit_size);<br>
+ nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);<br>
+ nir_ssa_def *local_id = nir_load_local_invocation_id(b, bit_size);<br>
<br>
sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);<br>
break;<br>
@@ -99,7 +108,7 @@ convert_block(nir_block *block, nir_builder *b)<br>
* gl_WorkGroupSize.y + gl_LocalInvocationID.y *<br>
* gl_WorkGroupSize.x + gl_LocalInvocationID.x"<br>
*/<br>
- nir_ssa_def *local_id = nir_load_local_invocation_id(b);<br>
+ nir_ssa_def *local_id = nir_load_local_invocation_id(b, bit_size);<br>
<br>
nir_ssa_def *size_x =<br>
nir_imm_int(b, b->shader->info.cs.local_size[0]);<br>
@@ -115,17 +124,17 @@ convert_block(nir_block *block, nir_builder *b)<br>
}<br>
<br>
case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {<br>
- sysval = build_local_group_size(b);<br>
+ sysval = build_local_group_size(b, bit_size);<br>
break;<br>
}<br>
<br>
case SYSTEM_VALUE_VERTEX_ID:<br>
if (b->shader->options->vertex_id_zero_based) {<br>
sysval = nir_iadd(b,<br>
- nir_load_vertex_id_zero_base(b),<br>
- nir_load_first_vertex(b));<br>
+ nir_load_vertex_id_zero_base(b, bit_size),<br>
+ nir_load_first_vertex(b, bit_size));<br>
} else {<br>
- sysval = nir_load_vertex_id(b);<br>
+ sysval = nir_load_vertex_id(b, bit_size);<br>
}<br>
break;<br>
<br>
@@ -140,14 +149,14 @@ convert_block(nir_block *block, nir_builder *b)<br>
*/<br>
if (b->shader->options->lower_base_vertex)<br>
sysval = nir_iand(b,<br>
- nir_load_is_indexed_draw(b),<br>
- nir_load_first_vertex(b));<br>
+ nir_load_is_indexed_draw(b, bit_size),<br>
+ nir_load_first_vertex(b, bit_size));<br>
break;<br>
<br>
case SYSTEM_VALUE_INSTANCE_INDEX:<br>
sysval = nir_iadd(b,<br>
- nir_load_instance_id(b),<br>
- nir_load_base_instance(b));<br>
+ nir_load_instance_id(b, bit_size),<br>
+ nir_load_base_instance(b, bit_size));<br>
break;<br>
<br>
case SYSTEM_VALUE_SUBGROUP_EQ_MASK:<br>
@@ -172,8 +181,8 @@ convert_block(nir_block *block, nir_builder *b)<br>
break;<br>
<br>
case SYSTEM_VALUE_GLOBAL_GROUP_SIZE: {<br>
- nir_ssa_def *group_size = build_local_group_size(b);<br>
- nir_ssa_def *num_work_groups = nir_load_num_work_groups(b);<br>
+ nir_ssa_def *group_size = nir_load_local_group_size(b, bit_size);<br>
+ nir_ssa_def *num_work_groups = nir_load_num_work_groups(b, bit_size);<br>
sysval = nir_imul(b, group_size, num_work_groups);<br>
break;<br>
}<br>
@@ -185,7 +194,7 @@ convert_block(nir_block *block, nir_builder *b)<br>
if (sysval == NULL) {<br>
nir_intrinsic_op sysval_op =<br>
nir_intrinsic_from_system_value(var->data.location);<br>
- sysval = nir_load_system_value(b, sysval_op, 0);<br>
+ sysval = nir_load_system_value(b, sysval_op, 0, bit_size);<br>
}<br>
<br>
nir_ssa_def_rewrite_uses(&load_deref->dest.ssa, nir_src_for_ssa(sysval));<br>
diff --git a/src/compiler/nir/nir_lower_two_sided_color.c b/src/compiler/nir/nir_lower_two_sided_color.c<br>
index b6742ab2462..20af88b6aec 100644<br>
--- a/src/compiler/nir/nir_lower_two_sided_color.c<br>
+++ b/src/compiler/nir/nir_lower_two_sided_color.c<br>
@@ -158,7 +158,7 @@ nir_lower_two_sided_color_block(nir_block *block,<br>
* bcsel(load_system_value(FACE), load_input(COLn), load_input(BFCn))<br>
*/<br>
b->cursor = nir_before_instr(&intr->instr);<br>
- nir_ssa_def *face = nir_load_front_face(b);<br>
+ nir_ssa_def *face = nir_load_front_face(b, 32);<br>
nir_ssa_def *front = load_input(b, state->colors[idx].front);<br>
nir_ssa_def *back = load_input(b, state->colors[idx].back);<br>
nir_ssa_def *color = nir_bcsel(b, face, front, back);<br>
diff --git a/src/compiler/nir/nir_lower_wpos_center.c b/src/compiler/nir/nir_lower_wpos_center.c<br>
index b6f3529c766..f2151244e17 100644<br>
--- a/src/compiler/nir/nir_lower_wpos_center.c<br>
+++ b/src/compiler/nir/nir_lower_wpos_center.c<br>
@@ -58,7 +58,7 @@ update_fragcoord(nir_builder *b, nir_intrinsic_instr *intr,<br>
wpos = nir_fadd(b, wpos, nir_imm_vec4(b, 0.5f, 0.5f, 0.0f, 0.0f));<br>
} else {<br>
nir_ssa_def *spos =<br>
- nir_load_system_value(b, nir_intrinsic_load_sample_pos, 0);<br>
+ nir_load_system_value(b, nir_intrinsic_load_sample_pos, 0, 32);<br>
<br>
wpos = nir_fadd(b, wpos,<br>
nir_vec4(b,<br>
diff --git a/src/compiler/spirv/vtn_subgroup.c b/src/compiler/spirv/vtn_subgroup.c<br>
index ecec3aa62d0..d71ae5284e4 100644<br>
--- a/src/compiler/spirv/vtn_subgroup.c<br>
+++ b/src/compiler/spirv/vtn_subgroup.c<br>
@@ -110,7 +110,7 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,<br>
nir_intrinsic_ballot_bitfield_extract);<br>
<br>
intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);<br>
- intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb));<br>
+ intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb, 32));<br>
<br>
nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, 32, NULL);<br>
nir_builder_instr_insert(&b->nb, &intrin->instr);<br>
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c<br>
index 1b31b564246..a4e46c54b4e 100644<br>
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c<br>
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c<br>
@@ -591,7 +591,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,<br>
nir_ssa_def *tgsi_frontface[4] = {<br>
nir_bcsel(&c->build,<br>
nir_load_system_value(&c->build,<br>
- nir_intrinsic_load_front_face, 0),<br>
+ nir_intrinsic_load_front_face,<br>
+ 0, 32),<br>
nir_imm_float(&c->build, 1.0),<br>
nir_imm_float(&c->build, -1.0)),<br>
nir_imm_float(&c->build, 0.0),<br>
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c<br>
index f719aac1b86..1dc0f5f25e8 100644<br>
--- a/src/intel/blorp/blorp_blit.c<br>
+++ b/src/intel/blorp/blorp_blit.c<br>
@@ -116,7 +116,7 @@ blorp_blit_get_frag_coords(nir_builder *b,<br>
<br>
if (key->persample_msaa_dispatch) {<br>
return nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1),<br>
- nir_load_sample_id(b));<br>
+ nir_load_sample_id(b, 32));<br>
} else {<br>
return nir_vec2(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1));<br>
}<br>
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c<br>
index b4c744020d9..a377fb5a212 100644<br>
--- a/src/intel/blorp/blorp_clear.c<br>
+++ b/src/intel/blorp/blorp_clear.c<br>
@@ -967,7 +967,7 @@ blorp_params_get_mcs_partial_resolve_kernel(struct blorp_context *blorp,<br>
/* Do an MCS fetch and check if it is equal to the magic clear value */<br>
nir_ssa_def *mcs =<br>
blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, blorp_nir_frag_coord(&b)),<br>
- nir_load_layer_id(&b));<br>
+ nir_load_layer_id(&b, 32));<br>
nir_ssa_def *is_clear =<br>
blorp_nir_mcs_is_clear_color(&b, mcs, blorp_key.num_samples);<br>
<br>
diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c<br>
index bfbdea0e8fa..846e82ffdf9 100644<br>
--- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c<br>
+++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c<br>
@@ -61,11 +61,11 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,<br>
if (state->local_workgroup_size <= state->dispatch_width)<br>
subgroup_id = nir_imm_int(b, 0);<br>
else<br>
- subgroup_id = nir_load_subgroup_id(b);<br>
+ subgroup_id = nir_load_subgroup_id(b, 32);<br>
<br>
nir_ssa_def *thread_local_id =<br>
nir_imul(b, subgroup_id, nir_imm_int(b, state->dispatch_width));<br>
- nir_ssa_def *channel = nir_load_subgroup_invocation(b);<br>
+ nir_ssa_def *channel = nir_load_subgroup_invocation(b, 32);<br>
sysval = nir_iadd(b, channel, thread_local_id);<br>
break;<br>
}<br>
@@ -86,7 +86,7 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,<br>
*/<br>
unsigned *size = nir->info.cs.local_size;<br>
<br>
- nir_ssa_def *local_index = nir_load_local_invocation_index(b);<br>
+ nir_ssa_def *local_index = nir_load_local_invocation_index(b, 32);<br>
<br>
nir_const_value uvec3;<br>
memset(&uvec3, 0, sizeof(uvec3));<br>
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c<br>
index 3b4642033fe..84a1d162607 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_tcs.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c<br>
@@ -48,7 +48,7 @@ create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler,<br>
nir_intrinsic_instr *store;<br>
nir_ssa_def *zero = nir_imm_int(&b, 0);<br>
nir_ssa_def *invoc_id =<br>
- nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0);<br>
+ nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0, 32);<br>
<br>
nir->info.inputs_read = key->outputs_written &<br>
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);<br>
-- <br>
2.17.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div>