<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst <span dir="ltr"><<a href="mailto:kherbst@redhat.com" target="_blank">kherbst@redhat.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">With OpenCL the size of some system value depends on the Physical model<br>
choosen, so we need a way to load any system value as 32 or 64 bit.<br>
<br>
Signed-off-by: Karol Herbst <<a href="mailto:kherbst@redhat.com">kherbst@redhat.com</a>><br>
---<br>
src/compiler/nir/nir_builder.h | 10 +++++---<br>
src/compiler/nir/nir_lower_<wbr>alpha_test.c | 2 +-<br>
src/compiler/nir/nir_lower_<wbr>clip.c | 3 ++-<br>
src/compiler/nir/nir_lower_<wbr>subgroups.c | 8 +++---<br>
src/compiler/nir/nir_lower_<wbr>system_values.c | 31 ++++++++++++------------<br>
src/compiler/nir/nir_lower_<wbr>two_sided_color.c | 2 +-<br>
src/compiler/nir/nir_lower_<wbr>wpos_center.c | 2 +-<br>
src/compiler/spirv/vtn_<wbr>subgroup.c | 2 +-<br>
src/gallium/auxiliary/nir/<wbr>tgsi_to_nir.c | 3 ++-<br>
src/intel/blorp/blorp_blit.c | 2 +-<br>
src/intel/blorp/blorp_clear.c | 2 +-<br>
src/intel/compiler/brw_nir_<wbr>lower_cs_intrinsics.c | 6 ++---<br>
src/mesa/drivers/dri/i965/brw_<wbr>tcs.c | 2 +-<br>
13 files changed, 40 insertions(+), 35 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir_<wbr>builder.h b/src/compiler/nir/nir_<wbr>builder.h<br>
index 36e0ae3ac63..4e93cd08169 100644<br>
--- a/src/compiler/nir/nir_<wbr>builder.h<br>
+++ b/src/compiler/nir/nir_<wbr>builder.h<br>
@@ -612,13 +612,14 @@ nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src)<br>
<br>
/* Generic builder for system values. */<br>
static inline nir_ssa_def *<br>
-nir_load_system_value(nir_<wbr>builder *build, nir_intrinsic_op op, int index)<br>
+nir_load_system_value(nir_<wbr>builder *build, nir_intrinsic_op op, int index,<br>
+ unsigned bit_size)<br>
{<br>
nir_intrinsic_instr *load = nir_intrinsic_instr_create(<wbr>build->shader, op);<br>
load->num_components = nir_intrinsic_infos[op].dest_<wbr>components;<br>
load->const_index[0] = index;<br>
nir_ssa_dest_init(&load-><wbr>instr, &load->dest,<br>
- nir_intrinsic_infos[op].dest_<wbr>components, 32, NULL);<br>
+ nir_intrinsic_infos[op].dest_<wbr>components, bit_size, NULL);<br>
nir_builder_instr_insert(<wbr>build, &load->instr);<br>
return &load->dest.ssa;<br>
}<br>
@@ -630,9 +631,10 @@ nir_load_system_value(nir_<wbr>builder *build, nir_intrinsic_op op, int index)<br>
<br>
#define DEFINE_SYSTEM_VALUE(name) \<br>
static inline nir_ssa_def * \<br>
- nir_load_##name(nir_builder *build) \<br>
+ nir_load_##name(nir_builder *build, unsigned bit_size) \<br></blockquote><div><br></div><div>I was really hoping that this change wouldn't touch every single intrinsic helper. Maybe with Rob's python-based intrinsics table we can do something better.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
{ \<br>
- return nir_load_system_value(build, nir_intrinsic_load_##name, 0); \<br>
+ return nir_load_system_value(build, nir_intrinsic_load_##name, 0, \<br>
+ bit_size); \<br>
}<br>
<br>
#include "nir_intrinsics.h"<br>
diff --git a/src/compiler/nir/nir_lower_<wbr>alpha_test.c b/src/compiler/nir/nir_lower_<wbr>alpha_test.c<br>
index 6bf9ff142df..29f91ab9428 100644<br>
--- a/src/compiler/nir/nir_lower_<wbr>alpha_test.c<br>
+++ b/src/compiler/nir/nir_lower_<wbr>alpha_test.c<br>
@@ -92,7 +92,7 @@ nir_lower_alpha_test(nir_<wbr>shader *shader, enum compare_func func,<br>
<br>
nir_ssa_def *condition =<br>
nir_compare_func(&b, func,<br>
- alpha, nir_load_alpha_ref_float(&b));<br>
+ alpha, nir_load_alpha_ref_float(&b, 32));<br>
<br>
nir_intrinsic_instr *discard =<br>
nir_intrinsic_instr_create(b.<wbr>shader,<br>
diff --git a/src/compiler/nir/nir_lower_<wbr>clip.c b/src/compiler/nir/nir_lower_<wbr>clip.c<br>
index ea12f51a7bb..b9a91f7d40b 100644<br>
--- a/src/compiler/nir/nir_lower_<wbr>clip.c<br>
+++ b/src/compiler/nir/nir_lower_<wbr>clip.c<br>
@@ -174,7 +174,8 @@ lower_clip_vs(nir_function_<wbr>impl *impl, unsigned ucp_enables,<br>
for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {<br>
if (ucp_enables & (1 << plane)) {<br>
nir_ssa_def *ucp =<br>
- nir_load_system_value(&b, nir_intrinsic_load_user_clip_<wbr>plane, plane);<br>
+ nir_load_system_value(&b, nir_intrinsic_load_user_clip_<wbr>plane,<br>
+ plane, 32);<br>
<br>
/* calculate clipdist[plane] - dot(ucp, cv): */<br>
clipdist[plane] = nir_fdot4(&b, ucp, cv);<br>
diff --git a/src/compiler/nir/nir_lower_<wbr>subgroups.c b/src/compiler/nir/nir_lower_<wbr>subgroups.c<br>
index 0d3c83b7951..7e910c013a9 100644<br>
--- a/src/compiler/nir/nir_lower_<wbr>subgroups.c<br>
+++ b/src/compiler/nir/nir_lower_<wbr>subgroups.c<br>
@@ -190,7 +190,7 @@ static nir_ssa_def *<br>
lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,<br>
bool lower_to_scalar)<br>
{<br>
- nir_ssa_def *index = nir_load_subgroup_invocation(<wbr>b);<br>
+ nir_ssa_def *index = nir_load_subgroup_invocation(<wbr>b, 32);<br>
switch (intrin->intrinsic) {<br>
case nir_intrinsic_shuffle_xor:<br>
assert(intrin->src[1].is_ssa);<br>
@@ -300,7 +300,7 @@ lower_subgroups_intrin(nir_<wbr>builder *b, nir_intrinsic_instr *intrin,<br>
assert(options->subgroup_size <= 64);<br>
uint64_t group_mask = ~0ull >> (64 - options->subgroup_size);<br>
<br>
- nir_ssa_def *count = nir_load_subgroup_invocation(<wbr>b);<br>
+ nir_ssa_def *count = nir_load_subgroup_invocation(<wbr>b, 32);<br>
nir_ssa_def *val;<br>
switch (intrin->intrinsic) {<br>
case nir_intrinsic_load_subgroup_<wbr>eq_mask:<br>
@@ -373,7 +373,7 @@ lower_subgroups_intrin(nir_<wbr>builder *b, nir_intrinsic_instr *intrin,<br>
<br>
case nir_intrinsic_ballot_bit_<wbr>count_exclusive:<br>
case nir_intrinsic_ballot_bit_<wbr>count_inclusive: {<br>
- nir_ssa_def *count = nir_load_subgroup_invocation(<wbr>b);<br>
+ nir_ssa_def *count = nir_load_subgroup_invocation(<wbr>b, 32);<br>
nir_ssa_def *mask = nir_imm_intN_t(b, ~0ull, options->ballot_bit_size);<br>
if (intrin->intrinsic == nir_intrinsic_ballot_bit_<wbr>count_inclusive) {<br>
const unsigned bits = options->ballot_bit_size;<br>
@@ -396,7 +396,7 @@ lower_subgroups_intrin(nir_<wbr>builder *b, nir_intrinsic_instr *intrin,<br>
nir_ssa_dest_init(&first-><wbr>instr, &first->dest, 1, 32, NULL);<br>
nir_builder_instr_insert(b, &first->instr);<br>
<br>
- return nir_ieq(b, nir_load_subgroup_invocation(<wbr>b), &first->dest.ssa);<br>
+ return nir_ieq(b, nir_load_subgroup_invocation(<wbr>b, 32), &first->dest.ssa);<br>
}<br>
<br>
case nir_intrinsic_shuffle:<br>
diff --git a/src/compiler/nir/nir_lower_<wbr>system_values.c b/src/compiler/nir/nir_lower_<wbr>system_values.c<br>
index fb560ee21bb..d507c28f421 100644<br>
--- a/src/compiler/nir/nir_lower_<wbr>system_values.c<br>
+++ b/src/compiler/nir/nir_lower_<wbr>system_values.c<br>
@@ -46,6 +46,7 @@ convert_block(nir_block *block, nir_builder *b)<br>
if (var->data.mode != nir_var_system_value)<br>
continue;<br>
<br>
+ unsigned bit_size = load_var->dest.ssa.bit_size;<br>
b->cursor = nir_after_instr(&load_var-><wbr>instr);<br>
<br>
nir_ssa_def *sysval = NULL;<br>
@@ -59,15 +60,15 @@ convert_block(nir_block *block, nir_builder *b)<br>
<br>
nir_const_value local_size;<br>
memset(&local_size, 0, sizeof(local_size));<br>
- local_size.u32[0] = b->shader->info.cs.local_size[<wbr>0];<br>
- local_size.u32[1] = b->shader->info.cs.local_size[<wbr>1];<br>
- local_size.u32[2] = b->shader->info.cs.local_size[<wbr>2];<br>
+ local_size.u64[0] = b->shader->info.cs.local_size[<wbr>0];<br>
+ local_size.u64[1] = b->shader->info.cs.local_size[<wbr>1];<br>
+ local_size.u64[2] = b->shader->info.cs.local_size[<wbr>2];<br>
<br>
- nir_ssa_def *group_id = nir_load_work_group_id(b);<br>
- nir_ssa_def *local_id = nir_load_local_invocation_id(<wbr>b);<br>
+ nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);<br>
+ nir_ssa_def *local_id = nir_load_local_invocation_id(<wbr>b, bit_size);<br>
<br>
sysval = nir_iadd(b, nir_imul(b, group_id,<br>
- nir_build_imm(b, 3, 32, local_size)),<br>
+ nir_build_imm(b, 3, bit_size, local_size)),<br></blockquote><div><br></div><div>This doesn't do what you think it does. Due to the way that the different arrays in nir_const_value alias, you can't put 64-bit values in the nir_const_value and then use 32 for nir_build_imm and expect it to work. We can either make a smarter immediate builder or just insert a u2u64 instruction which will get properly constant folded.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
local_id);<br>
break;<br>
}<br>
@@ -86,12 +87,12 @@ convert_block(nir_block *block, nir_builder *b)<br>
* gl_WorkGroupSize.y + gl_LocalInvocationID.y *<br>
* gl_WorkGroupSize.x + gl_LocalInvocationID.x"<br>
*/<br>
- nir_ssa_def *local_id = nir_load_local_invocation_id(<wbr>b);<br>
+ nir_ssa_def *local_id = nir_load_local_invocation_id(<wbr>b, bit_size);<br>
<br>
nir_ssa_def *size_x =<br>
- nir_imm_int(b, b->shader->info.cs.local_size[<wbr>0]);<br>
+ nir_imm_intN_t(b, b->shader->info.cs.local_size[<wbr>0], bit_size);<br>
nir_ssa_def *size_y =<br>
- nir_imm_int(b, b->shader->info.cs.local_size[<wbr>1]);<br>
+ nir_imm_intN_t(b, b->shader->info.cs.local_size[<wbr>1], bit_size);<br>
<br>
sysval = nir_imul(b, nir_channel(b, local_id, 2),<br>
nir_imul(b, size_x, size_y));<br>
@@ -104,17 +105,17 @@ convert_block(nir_block *block, nir_builder *b)<br>
case SYSTEM_VALUE_VERTEX_ID:<br>
if (b->shader->options->vertex_<wbr>id_zero_based) {<br>
sysval = nir_iadd(b,<br>
- nir_load_vertex_id_zero_base(<wbr>b),<br>
- nir_load_base_vertex(b));<br>
+ nir_load_vertex_id_zero_base(<wbr>b, bit_size),<br>
+ nir_load_base_vertex(b, bit_size));<br>
} else {<br>
- sysval = nir_load_vertex_id(b);<br>
+ sysval = nir_load_vertex_id(b, bit_size);<br>
}<br>
break;<br>
<br>
case SYSTEM_VALUE_INSTANCE_INDEX:<br>
sysval = nir_iadd(b,<br>
- nir_load_instance_id(b),<br>
- nir_load_base_instance(b));<br>
+ nir_load_instance_id(b, bit_size),<br>
+ nir_load_base_instance(b, bit_size));<br>
break;<br>
<br>
case SYSTEM_VALUE_SUBGROUP_EQ_MASK:<br>
@@ -145,7 +146,7 @@ convert_block(nir_block *block, nir_builder *b)<br>
if (sysval == NULL) {<br>
nir_intrinsic_op sysval_op =<br>
nir_intrinsic_from_system_<wbr>value(var->data.location);<br>
- sysval = nir_load_system_value(b, sysval_op, 0);<br>
+ sysval = nir_load_system_value(b, sysval_op, 0, bit_size);<br>
}<br>
<br>
nir_ssa_def_rewrite_uses(&<wbr>load_var->dest.ssa, nir_src_for_ssa(sysval));<br>
diff --git a/src/compiler/nir/nir_lower_<wbr>two_sided_color.c b/src/compiler/nir/nir_lower_<wbr>two_sided_color.c<br>
index b6742ab2462..20af88b6aec 100644<br>
--- a/src/compiler/nir/nir_lower_<wbr>two_sided_color.c<br>
+++ b/src/compiler/nir/nir_lower_<wbr>two_sided_color.c<br>
@@ -158,7 +158,7 @@ nir_lower_two_sided_color_<wbr>block(nir_block *block,<br>
* bcsel(load_system_value(FACE), load_input(COLn), load_input(BFCn))<br>
*/<br>
b->cursor = nir_before_instr(&intr->instr)<wbr>;<br>
- nir_ssa_def *face = nir_load_front_face(b);<br>
+ nir_ssa_def *face = nir_load_front_face(b, 32);<br>
nir_ssa_def *front = load_input(b, state->colors[idx].front);<br>
nir_ssa_def *back = load_input(b, state->colors[idx].back);<br>
nir_ssa_def *color = nir_bcsel(b, face, front, back);<br>
diff --git a/src/compiler/nir/nir_lower_<wbr>wpos_center.c b/src/compiler/nir/nir_lower_<wbr>wpos_center.c<br>
index dca810d735e..a0d9719e270 100644<br>
--- a/src/compiler/nir/nir_lower_<wbr>wpos_center.c<br>
+++ b/src/compiler/nir/nir_lower_<wbr>wpos_center.c<br>
@@ -58,7 +58,7 @@ update_fragcoord(nir_builder *b, nir_intrinsic_instr *intr,<br>
wpos = nir_fadd(b, wpos, nir_imm_vec4(b, 0.5f, 0.5f, 0.0f, 0.0f));<br>
} else {<br>
nir_ssa_def *spos =<br>
- nir_load_system_value(b, nir_intrinsic_load_sample_pos, 0);<br>
+ nir_load_system_value(b, nir_intrinsic_load_sample_pos, 0, 32);<br>
<br>
wpos = nir_fadd(b, wpos,<br>
nir_vec4(b,<br>
diff --git a/src/compiler/spirv/vtn_<wbr>subgroup.c b/src/compiler/spirv/vtn_<wbr>subgroup.c<br>
index bd3143962be..50a4ecc2dcc 100644<br>
--- a/src/compiler/spirv/vtn_<wbr>subgroup.c<br>
+++ b/src/compiler/spirv/vtn_<wbr>subgroup.c<br>
@@ -110,7 +110,7 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,<br>
nir_intrinsic_ballot_bitfield_<wbr>extract);<br>
<br>
intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(<wbr>b, w[4])->def);<br>
- intrin->src[1] = nir_src_for_ssa(nir_load_<wbr>subgroup_invocation(&b->nb));<br>
+ intrin->src[1] = nir_src_for_ssa(nir_load_<wbr>subgroup_invocation(&b->nb, 32));<br>
<br>
nir_ssa_dest_init(&intrin-><wbr>instr, &intrin->dest, 1, 32, NULL);<br>
nir_builder_instr_insert(&b-><wbr>nb, &intrin->instr);<br>
diff --git a/src/gallium/auxiliary/nir/<wbr>tgsi_to_nir.c b/src/gallium/auxiliary/nir/<wbr>tgsi_to_nir.c<br>
index f8df4c10137..852b24eaaf1 100644<br>
--- a/src/gallium/auxiliary/nir/<wbr>tgsi_to_nir.c<br>
+++ b/src/gallium/auxiliary/nir/<wbr>tgsi_to_nir.c<br>
@@ -610,7 +610,8 @@ ttn_src_for_file_and_index(<wbr>struct ttn_compile *c, unsigned file, unsigned index,<br>
nir_ssa_def *tgsi_frontface[4] = {<br>
nir_bcsel(&c->build,<br>
nir_load_system_value(&c-><wbr>build,<br>
- nir_intrinsic_load_front_face, 0),<br>
+ nir_intrinsic_load_front_face,<br>
+ 0, 32),<br>
nir_imm_float(&c->build, 1.0),<br>
nir_imm_float(&c->build, -1.0)),<br>
nir_imm_float(&c->build, 0.0),<br>
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c<br>
index 0757db0d04b..ca70734981a 100644<br>
--- a/src/intel/blorp/blorp_blit.c<br>
+++ b/src/intel/blorp/blorp_blit.c<br>
@@ -114,7 +114,7 @@ blorp_blit_get_frag_coords(<wbr>nir_builder *b,<br>
<br>
if (key->persample_msaa_dispatch) {<br>
return nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1),<br>
- nir_load_sample_id(b));<br>
+ nir_load_sample_id(b, 32));<br>
} else {<br>
return nir_vec2(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1));<br>
}<br>
diff --git a/src/intel/blorp/blorp_clear.<wbr>c b/src/intel/blorp/blorp_clear.<wbr>c<br>
index 832e8ee26f9..c0207d8fa0c 100644<br>
--- a/src/intel/blorp/blorp_clear.<wbr>c<br>
+++ b/src/intel/blorp/blorp_clear.<wbr>c<br>
@@ -880,7 +880,7 @@ blorp_params_get_mcs_partial_<wbr>resolve_kernel(struct blorp_context *blorp,<br>
/* Do an MCS fetch and check if it is equal to the magic clear value */<br>
nir_ssa_def *mcs =<br>
blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, blorp_nir_frag_coord(&b)),<br>
- nir_load_layer_id(&b));<br>
+ nir_load_layer_id(&b, 32));<br>
nir_ssa_def *is_clear =<br>
blorp_nir_mcs_is_clear_color(&<wbr>b, mcs, blorp_key.num_samples);<br>
<br>
diff --git a/src/intel/compiler/brw_nir_<wbr>lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_<wbr>lower_cs_intrinsics.c<br>
index bfbdea0e8fa..846e82ffdf9 100644<br>
--- a/src/intel/compiler/brw_nir_<wbr>lower_cs_intrinsics.c<br>
+++ b/src/intel/compiler/brw_nir_<wbr>lower_cs_intrinsics.c<br>
@@ -61,11 +61,11 @@ lower_cs_intrinsics_convert_<wbr>block(struct lower_intrinsics_state *state,<br>
if (state->local_workgroup_size <= state->dispatch_width)<br>
subgroup_id = nir_imm_int(b, 0);<br>
else<br>
- subgroup_id = nir_load_subgroup_id(b);<br>
+ subgroup_id = nir_load_subgroup_id(b, 32);<br>
<br>
nir_ssa_def *thread_local_id =<br>
nir_imul(b, subgroup_id, nir_imm_int(b, state->dispatch_width));<br>
- nir_ssa_def *channel = nir_load_subgroup_invocation(<wbr>b);<br>
+ nir_ssa_def *channel = nir_load_subgroup_invocation(<wbr>b, 32);<br>
sysval = nir_iadd(b, channel, thread_local_id);<br>
break;<br>
}<br>
@@ -86,7 +86,7 @@ lower_cs_intrinsics_convert_<wbr>block(struct lower_intrinsics_state *state,<br>
*/<br>
unsigned *size = nir->info.cs.local_size;<br>
<br>
- nir_ssa_def *local_index = nir_load_local_invocation_<wbr>index(b);<br>
+ nir_ssa_def *local_index = nir_load_local_invocation_<wbr>index(b, 32);<br>
<br>
nir_const_value uvec3;<br>
memset(&uvec3, 0, sizeof(uvec3));<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>brw_tcs.c b/src/mesa/drivers/dri/i965/<wbr>brw_tcs.c<br>
index 931ef64166c..dda6431108d 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>brw_tcs.c<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>brw_tcs.c<br>
@@ -48,7 +48,7 @@ create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler,<br>
nir_intrinsic_instr *store;<br>
nir_ssa_def *zero = nir_imm_int(&b, 0);<br>
nir_ssa_def *invoc_id =<br>
- nir_load_system_value(&b, nir_intrinsic_load_invocation_<wbr>id, 0);<br>
+ nir_load_system_value(&b, nir_intrinsic_load_invocation_<wbr>id, 0, 32);<br>
<br>
nir->info.inputs_read = key->outputs_written &<br>
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.14.3<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>