[Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot

Mon Jul 10 20:10:26 UTC 2017

On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner <mattst88 at gmail.com> wrote:
> We already had a channel_num system value, which I'm renaming to
> subgroup_invocation to match the rest of the new system values.
>
> Note that while ballotARB(true) will return zeros in the high 32-bits on
> systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB
> variables do not consider whether channels are enabled. See issue (1) of
> ARB_shader_ballot.
> ---
>  src/compiler/nir/nir.c                     |  4 ++++
>  src/compiler/nir/nir_intrinsics.h          |  8 +++++++-
>  src/compiler/nir/nir_lower_system_values.c | 28 ++++++++++++++++++++++++++++
>  src/intel/compiler/brw_fs_nir.cpp          |  2 +-
>  src/intel/compiler/brw_nir_intrinsics.c    |  4 ++--
>  5 files changed, 42 insertions(+), 4 deletions(-)
>
> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
> index 491b908396..9827e129ca 100644
> --- a/src/compiler/nir/nir.c
> +++ b/src/compiler/nir/nir.c
> @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val)
>        return nir_intrinsic_load_helper_invocation;
>     case SYSTEM_VALUE_VIEW_INDEX:
>        return nir_intrinsic_load_view_index;
> +   case SYSTEM_VALUE_SUBGROUP_SIZE:
> +      return nir_intrinsic_load_subgroup_size;
> +   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
> +      return nir_intrinsic_load_subgroup_invocation;
>     default:
>        unreachable("system value does not directly correspond to intrinsic");
>     }
> diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h
> index 6c6ba4cf59..96ecfbc338 100644
> --- a/src/compiler/nir/nir_intrinsics.h
> +++ b/src/compiler/nir/nir_intrinsics.h
> @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
>  SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
>  SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
>  SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
> -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)
>
>  /* Blend constant color values.  Float values are clamped. */
>  SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
> diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c
> index 810100a081..faf0c3c9da 100644
> --- a/src/compiler/nir/nir_lower_system_values.c
> +++ b/src/compiler/nir/nir_lower_system_values.c
> @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b)
>                             nir_load_base_instance(b));
>           break;
>
> +      case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
> +      case SYSTEM_VALUE_SUBGROUP_GE_MASK:
> +      case SYSTEM_VALUE_SUBGROUP_GT_MASK:
> +      case SYSTEM_VALUE_SUBGROUP_LE_MASK:
> +      case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
> +         nir_ssa_def *count = nir_load_subgroup_invocation(b);
> +
> +         switch (var->data.location) {
> +         case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
> +            sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count);
> +            break;
> +         case SYSTEM_VALUE_SUBGROUP_GE_MASK:
> +            sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count);
> +            break;
> +         case SYSTEM_VALUE_SUBGROUP_GT_MASK:
> +            sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count);
> +            break;
> +         case SYSTEM_VALUE_SUBGROUP_LE_MASK:
> +            sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), count));
> +            break;
> +         case SYSTEM_VALUE_SUBGROUP_LT_MASK:
> +            sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), count));
> +            break;
> +         default:
> +            unreachable("you seriously can't tell this is unreachable?");
> +         }
> +      }
> +

While this fine to do for both Intel and AMD, Nvidia actually has
special system values for these, and AMD has special instructions for
bitCount(foo & gl_SubGroupLtMask), so I think we should have actual
nir_load_subgroup_*_mask intrinsics for these. Also, that way you can
use the same shrinking logic to turn these into 32-bit shifts on
Intel.

>        default:
>           break;
>        }
> diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
> index 264398f38e..17f35e081d 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -4075,7 +4075,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
>        break;
>     }
>
> -   case nir_intrinsic_load_channel_num: {
> +   case nir_intrinsic_load_subgroup_invocation: {
>        fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
>        dest = retype(dest, BRW_REGISTER_TYPE_UD);
>        const fs_builder allbld8 = bld.group(8, 0).exec_all();
> diff --git a/src/intel/compiler/brw_nir_intrinsics.c b/src/intel/compiler/brw_nir_intrinsics.c
> index d63570fa2a..abbbc6f93e 100644
> --- a/src/intel/compiler/brw_nir_intrinsics.c
> +++ b/src/intel/compiler/brw_nir_intrinsics.c
> @@ -88,10 +88,10 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
>           /* We construct the local invocation index from:
>            *
>            *    gl_LocalInvocationIndex =
> -          *       cs_thread_local_id + channel_num;
> +          *       cs_thread_local_id + subgroup_invocation;
>            */
>           nir_ssa_def *thread_local_id = read_thread_local_id(state);
> -         nir_ssa_def *channel = nir_load_channel_num(b);
> +         nir_ssa_def *channel = nir_load_subgroup_invocation(b);
>           sysval = nir_iadd(b, channel, thread_local_id);
>           break;
>        }
> --
> 2.13.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev