[Mesa-dev] [PATCH] nir: create 32-bit bcsel for 32-bit conditions
Alan Swanson
reiver at improbability.net
Wed Dec 19 18:27:27 UTC 2018
On Mon, 2018-12-17 at 16:16 +0000, Rhys Perry wrote:
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> ---
> src/compiler/nir/nir_opt_peephole_select.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/nir/nir_opt_peephole_select.c
> b/src/compiler/nir/nir_opt_peephole_select.c
> index ad9d0abec0..241627ed99 100644
> --- a/src/compiler/nir/nir_opt_peephole_select.c
> +++ b/src/compiler/nir/nir_opt_peephole_select.c
> @@ -205,7 +205,9 @@ nir_opt_peephole_select_block(nir_block *block,
> nir_shader *shader,
> break;
>
> nir_phi_instr *phi = nir_instr_as_phi(instr);
> - nir_alu_instr *sel = nir_alu_instr_create(shader,
> nir_op_bcsel);
> + nir_op sel_op = nir_src_bit_size(if_stmt->condition) == 1 ?
> + nir_op_bcsel : nir_op_b32csel;
> + nir_alu_instr *sel = nir_alu_instr_create(shader, sel_op);
> nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel);
> /* Splat the condition to all channels */
> memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
Just to note that I currently need this on radv with git head to run
Total War: Warhammer 2 and probably other vulkan based games, else I
get the following validation error (though it was suggested validator
wouldn't complain in follow on emails);
NIR validation failed after nir_opt_peephole_select
3 errors:
shader: MESA_SHADER_COMPUTE
name: tfb_query
local-size: 64, 1, 1
shared-size: 0
inputs: 0
outputs: 0
uniforms: 0
shared: 0
decl_function main (0 params)
impl main {
block block_0:
/* preds: */
vec1 64 ssa_0 = load_const (0x 0 /* 0.000000 */)
vec1 32 ssa_4 = load_const (0x00000000 /* 0.000000 */)
/* flags */ vec1 32 ssa_7 = intrinsic load_push_constant
(ssa_4) (0, 16) /* base=0 */ /* range=16 */
vec1 32 ssa_9 = intrinsic vulkan_resource_index (ssa_4) (0, 0)
/* desc-set=0 */ /* binding=0 */
vec1 32 ssa_11 = intrinsic vulkan_resource_index (ssa_4) (0, 1)
/* desc-set=0 */ /* binding=1 */
vec3 32 ssa_12 = intrinsic load_local_invocation_id () ()
vec3 32 ssa_13 = intrinsic load_work_group_id () ()
vec1 32 ssa_14 = load_const (0x00000040 /* 0.000000 */)
vec1 32 ssa_91 = imul ssa_13.x, ssa_14
vec1 32 ssa_96 = iadd ssa_91, ssa_12.x
vec1 32 ssa_22 = load_const (0x00000020 /* 0.000000 */)
vec1 32 ssa_23 = imul ssa_22, ssa_96
vec1 32 ssa_24 = load_const (0x00000004 /* 0.000000 */)
/* output_stride */ vec1 32 ssa_25 = intrinsic
load_push_constant (ssa_24) (0, 16) /* base=0 */ /* range=16 */
vec1 32 ssa_26 = imul /* output_stride */ ssa_25, ssa_96
vec4 32 ssa_27 = intrinsic load_ssbo (ssa_11, ssa_23) (0, 0, 0)
/* access=0 */ /* align_mul=0 */ /* align_offset=0 */
vec1 32 ssa_28 = load_const (0x00000010 /* 0.000000 */)
vec1 32 ssa_29 = iadd ssa_23, ssa_28
vec4 32 ssa_30 = intrinsic load_ssbo (ssa_11, ssa_29) (0, 0, 0)
/* access=0 */ /* align_mul=0 */ /* align_offset=0 */
vec1 32 ssa_33 = iand ssa_27.y, ssa_27.w
vec1 32 ssa_36 = iand ssa_30.y, ssa_30.w
vec1 32 ssa_37 = load_const (0x80000000 /* -0.000000 */)
vec1 32 ssa_38 = iand ssa_33, ssa_36
vec1 32 ssa_39 = iand ssa_38, ssa_37
vec1 64 ssa_45 = pack_64_2x32_split ssa_27.x, ssa_27.y
vec1 64 ssa_51 = pack_64_2x32_split ssa_27.z, ssa_27.w
vec1 64 ssa_57 = pack_64_2x32_split ssa_30.x, ssa_30.y
vec1 64 ssa_63 = pack_64_2x32_split ssa_30.z, ssa_30.w
vec1 64 ssa_64 = isub ssa_63, ssa_51
vec1 64 ssa_65 = isub ssa_57, ssa_45
vec1 32 ssa_68 = load_const (0x00000001 /* 0.000000 */)
vec1 32 ssa_120 = bcsel ssa_39, ssa_68, ssa_4
error: src_bit_size == nir_alu_type_get_type_size(src_type) (../mesa-
9999/src/compiler/nir/nir_validate.c:360)
vec1 64 ssa_121 = bcsel ssa_39, ssa_64, ssa_0
error: src_bit_size == nir_alu_type_get_type_size(src_type) (../mesa-
9999/src/compiler/nir/nir_validate.c:360)
vec1 64 ssa_122 = bcsel ssa_39, ssa_65, ssa_0
error: src_bit_size == nir_alu_type_get_type_size(src_type) (../mesa-
9999/src/compiler/nir/nir_validate.c:360)
vec1 32 ssa_72 = load_const (0x00000001 /* 0.000000 */)
vec1 32 ssa_73 = iand /* flags */ ssa_7, ssa_72
vec1 32 ssa_74 = load_const (0x00000008 /* 0.000000 */)
vec1 32 ssa_76 = b32csel ssa_73, ssa_28, ssa_74
vec1 32 ssa_79 = iand /* flags */ ssa_7, ssa_74
vec1 32 ssa_80 = ior ssa_79, ssa_120
/* succs: block_1 block_5 */
if ssa_80 {
block block_1:
/* preds: block_0 */
/* succs: block_2 block_3 */
if ssa_73 {
block block_2:
/* preds: block_1 */
vec2 64 ssa_106 = vec2 ssa_121, ssa_122
intrinsic store_ssbo (ssa_106, ssa_9, ssa_26)
(3, 0, 0, 0) /* wrmask=xy */ /* access=0 */ /* align_mul=0 */ /*
align_offset=0 */
/* succs: block_4 */
} else {
block block_3:
/* preds: block_1 */
vec1 32 ssa_110 = u2u32 ssa_121
vec1 32 ssa_111 = u2u32 ssa_122
vec2 32 ssa_112 = vec2 ssa_110, ssa_111
intrinsic store_ssbo (ssa_112, ssa_9, ssa_26)
(3, 0, 0, 0) /* wrmask=xy */ /* access=0 */ /* align_mul=0 */ /*
align_offset=0 */
/* succs: block_4 */
}
block block_4:
/* preds: block_2 block_3 */
/* succs: block_6 */
} else {
block block_5:
/* preds: block_0 */
/* succs: block_6 */
}
block block_6:
/* preds: block_4 block_5 */
vec1 32 ssa_85 = iand /* flags */ ssa_7, ssa_24
/* succs: block_7 block_8 */
if ssa_85 {
block block_7:
/* preds: block_6 */
vec1 32 ssa_87 = iadd ssa_76, ssa_26
intrinsic store_ssbo (ssa_120, ssa_9, ssa_87) (1, 0, 0,
0) /* wrmask=x */ /* access=0 */ /* align_mul=0 */ /* align_offset=0 */
/* succs: block_9 */
} else {
block block_8:
/* preds: block_6 */
/* succs: block_9 */
}
block block_9:
/* preds: block_7 block_8 */
/* succs: block_10 */
block block_10:
}
--
Cheers, Alan.
More information about the mesa-dev
mailing list