[Mesa-dev] [PATCH 3/7] ac: lower 64bit subgroup intrinsics

Michael Schellenberger Costa mschellenbergercosta at googlemail.com
Thu Mar 8 18:53:43 UTC 2018


Hi Daniel,


Am 08.03.2018 um 18:10 schrieb Daniel Schürmann:
> ---
>   src/amd/common/ac_lower_subgroups.c | 50 ++++++++++++++++++++++++++++++++++---
>   1 file changed, 46 insertions(+), 4 deletions(-)
>
> diff --git a/src/amd/common/ac_lower_subgroups.c b/src/amd/common/ac_lower_subgroups.c
> index d0782b481b..2be48e2ba1 100644
> --- a/src/amd/common/ac_lower_subgroups.c
> +++ b/src/amd/common/ac_lower_subgroups.c
> @@ -26,9 +26,45 @@
>   
>   #include "ac_nir_to_llvm.h"
>   
> +static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, nir_intrinsic_instr *intrin) {
> +	assert(intrin->src[0].ssa->bit_size == 64);
> +	nir_ssa_def * x = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
> +	nir_ssa_def * y = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
The extra space looks before x/y looks wrong.
> +	nir_intrinsic_instr *intr_x = nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
> +	nir_intrinsic_instr *intr_y = nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
> +	nir_ssa_dest_init(&intr_x->instr, &intr_x->dest, 1, 32, NULL);
> +	nir_ssa_dest_init(&intr_y->instr, &intr_y->dest, 1, 32, NULL);
> +	intr_x->src[0] = nir_src_for_ssa(x);
> +	intr_y->src[0] = nir_src_for_ssa(y);
> +	intr_x->const_index[0] = intr_y->const_index[0] = intrin->const_index[0];
> +	intr_x->const_index[1] = intr_y->const_index[1] = intrin->const_index[1];
> +	if (intrin->intrinsic == nir_intrinsic_read_invocation ||
> +		intrin->intrinsic == nir_intrinsic_shuffle ||
> +		intrin->intrinsic == nir_intrinsic_quad_broadcast) {
Indentation is off for the other conditions.
> +		nir_src_copy(&intr_x->src[1], &intrin->src[1], intr_x);
> +		nir_src_copy(&intr_y->src[1], &intrin->src[1], intr_y);
> +	}
> +	intr_x->num_components = 1;
> +	intr_y->num_components = 1;
> +	nir_builder_instr_insert(b, &intr_x->instr);
> +	nir_builder_instr_insert(b, &intr_y->instr);
> +	return nir_pack_64_2x32_split(b, &intr_x->dest.ssa, &intr_y->dest.ssa);
> +}

That said could you make a helper function:

static nir_intrinsic_instr *ac_lower_subgroups_64bit_split_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, unsigned int component) {
	nir_ssa_def *comp;
         if (component == 0)
         	comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
	else
  		comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);

	nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
	nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL);
	intr->src[0] = nir_src_for_ssa(comp);

	intr->const_index[0] = intrin->const_index[0];
	intr->const_index[1] = intrin->const_index[1];
	if (intrin->intrinsic == nir_intrinsic_read_invocation ||
	    intrin->intrinsic == nir_intrinsic_shuffle ||
	    intrin->intrinsic == nir_intrinsic_quad_broadcast) {
		nir_src_copy(&intr->src[1], &intrin->src[1], intr);
	}
	intr->num_components = 1;
         return intr;
}

And then simplify into:

static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, nir_intrinsic_instr *intrin) {
	assert(intrin->src[0].ssa->bit_size == 64);
	nir_intrinsic_instr *intr_x = ac_lower_subgroups_64bit_split_intrinsic(b, intrin, 0);
	nir_intrinsic_instr *intr_y = ac_lower_subgroups_64bit_split_intrinsic(b, intrin, 1);

	nir_builder_instr_insert(b, &intr_x->instr);
	nir_builder_instr_insert(b, &intr_y->instr);
	return nir_pack_64_2x32_split(b, &intr_x->dest.ssa, &intr_y->dest.ssa);
}

--Michael

> +
>   static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin)
>   {
>   	switch(intrin->intrinsic) {
> +	case nir_intrinsic_read_invocation:
> +	case nir_intrinsic_read_first_invocation:
> +	case nir_intrinsic_shuffle:
> +	case nir_intrinsic_quad_broadcast:
> +	case nir_intrinsic_quad_swap_horizontal:
> +	case nir_intrinsic_quad_swap_vertical:
> +	case nir_intrinsic_quad_swap_diagonal:
> +		if (intrin->src[0].ssa->bit_size == 64)
> +			return ac_lower_subgroups_64bit(b, intrin);
> +		else
> +			return NULL;
>   	case nir_intrinsic_vote_ieq:
>   	case nir_intrinsic_vote_feq: {
>   		nir_intrinsic_instr *rfi =
> @@ -37,12 +73,18 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, nir_intrinsic_inst
>   		                  1, intrin->src[0].ssa->bit_size, NULL);
>   		nir_src_copy(&rfi->src[0], &intrin->src[0], rfi);
>   		rfi->num_components = 1;
> -
> +		nir_ssa_def *first_lane;
> +		if (intrin->src[0].ssa->bit_size == 64) {
> +			first_lane = ac_lower_subgroups_64bit(b, rfi);
> +		} else {
> +			nir_builder_instr_insert(b, &rfi->instr);
> +			first_lane = &rfi->dest.ssa;
> +		}
>   		nir_ssa_def *is_ne;
>   		if (intrin->intrinsic == nir_intrinsic_vote_feq)
> -			is_ne = nir_fne(b, &rfi->dest.ssa, intrin->src[0].ssa);
> +			is_ne = nir_fne(b, first_lane, intrin->src[0].ssa);
>   		else
> -			is_ne = nir_ine(b, &rfi->dest.ssa, intrin->src[0].ssa);
> +			is_ne = nir_ine(b, first_lane, intrin->src[0].ssa);
>   
>   		nir_intrinsic_instr *ballot =
>   			nir_intrinsic_instr_create(b->shader, nir_intrinsic_ballot);
> @@ -50,7 +92,7 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, nir_intrinsic_inst
>   		                  1, 64, NULL);
>   		ballot->src[0] = nir_src_for_ssa(is_ne);
>   		ballot->num_components = 1;
> -
> +		nir_builder_instr_insert(b, &ballot->instr);
>   		return nir_ieq(b, &ballot->dest.ssa, nir_imm_int64(b, 0));
>   	}
>   	default:



More information about the mesa-dev mailing list