[Mesa-dev] [PATCH 05/12] i965/cs: Split out helper for building local id payload
Pohjolainen, Topi
topi.pohjolainen at intel.com
Thu Oct 8 01:34:47 PDT 2015
On Wed, Oct 07, 2015 at 07:11:45AM -0700, Kristian H?gsberg Kristensen wrote:
> The initial motivation for this patch was to avoid calling
> brw_cs_prog_local_id_payload_dwords() in gen7_cs_state.c from the
> compiler. This commit ends up refactoring things a bit more so as to
> split out the logic to build the local id payload to brw_fs.cpp. This
> moves the payload building closer to the compiler code that uses the
> payload layout and makes it avaiable to other users of the compiler.
^
available
>
> Signed-off-by: Kristian Høgsberg Kristensen <krh at bitplanet.net>
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 1 +
> src/mesa/drivers/dri/i965/brw_cs.h | 5 +-
> src/mesa/drivers/dri/i965/brw_fs.cpp | 68 ++++++++++++++++++++++++--
> src/mesa/drivers/dri/i965/gen7_cs_state.c | 80 ++++---------------------------
> 4 files changed, 76 insertions(+), 78 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 0a29a69..1869f28 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -484,6 +484,7 @@ struct brw_cs_prog_data {
> unsigned simd_size;
> bool uses_barrier;
> bool uses_num_work_groups;
> + unsigned local_invocation_id_regs;
>
> struct {
> /** @{
> diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h
> index 0c0ed2b..c07eb6c 100644
> --- a/src/mesa/drivers/dri/i965/brw_cs.h
> +++ b/src/mesa/drivers/dri/i965/brw_cs.h
> @@ -48,8 +48,9 @@ brw_cs_emit(struct brw_context *brw,
> struct gl_shader_program *prog,
> unsigned *final_assembly_size);
>
> -unsigned
> -brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width);
> +void
> +brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
> + void *buffer, uint32_t threads, uint32_t stride);
>
> #ifdef __cplusplus
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 1dee888..b4125aa 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -4718,20 +4718,43 @@ fs_visitor::setup_vs_payload()
> payload.num_regs = 2;
> }
>
> +/**
> + * We are building the local ID push constant data using the simplest possible
> + * method. We simply push the local IDs directly as they should appear in the
> + * registers for the uvec3 gl_LocalInvocationID variable.
> + *
> + * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
> + * registers worth of push constant space.
> + *
> + * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
> + * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
> + * to coordinated.
> + *
> + * FINISHME: There are a few easy optimizations to consider.
> + *
> + * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
> + * no need for using push constant space for that dimension.
> + *
> + * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
> + * easily use 16-bit words rather than 32-bit dwords in the push constant
> + * data.
> + *
> + * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
> + * conveying the data, and thereby reduce push constant usage.
> + *
> + */
> void
> fs_visitor::setup_cs_payload()
> {
> assert(devinfo->gen >= 7);
> + brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
>
> payload.num_regs = 1;
>
> if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
> - const unsigned local_id_dwords =
> - brw_cs_prog_local_id_payload_dwords(dispatch_width);
> - assert((local_id_dwords & 0x7) == 0);
> - const unsigned local_id_regs = local_id_dwords / 8;
> + prog_data->local_invocation_id_regs = dispatch_width * 3 / 8;
Subtle change in functionality here. Before this was fixed to 3 without
taking the dispatch width into account. Might be worth to note in the commit
message.
> payload.local_invocation_id_reg = payload.num_regs;
> - payload.num_regs += local_id_regs;
> + payload.num_regs += prog_data->local_invocation_id_regs;
> }
> }
>
> @@ -5171,6 +5194,41 @@ brw_wm_fs_emit(struct brw_context *brw,
> return g.get_assembly(final_assembly_size);
> }
>
> +void
> +brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
> + void *buffer, uint32_t threads, uint32_t stride)
> +{
> + if (prog_data->local_invocation_id_regs == 0)
> + return;
> +
> + /* stride should be an integer number of registers, that is, a multiple of
> + * 32 bytes. */
> + assert(stride % 32 == 0);
> +
> + unsigned x = 0, y = 0, z = 0;
> + for (unsigned t = 0; t < threads; t++) {
> + uint32_t *param = (uint32_t *) buffer + stride * t / 4;
> +
> + for (unsigned i = 0; i < prog_data->simd_size; i++) {
> + param[0 * prog_data->simd_size + i] = x;
> + param[1 * prog_data->simd_size + i] = y;
> + param[2 * prog_data->simd_size + i] = z;
> +
> + x++;
> + if (x == prog_data->local_size[0]) {
> + x = 0;
> + y++;
> + if (y == prog_data->local_size[1]) {
> + y = 0;
> + z++;
> + if (z == prog_data->local_size[2])
> + z = 0;
> + }
> + }
> + }
> + }
> +}
> +
> fs_reg *
> fs_visitor::emit_cs_local_invocation_id_setup()
> {
> diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c
> index 5edc4fc..6aeb0cb 100644
> --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
> @@ -70,10 +70,8 @@ brw_upload_cs_state(struct brw_context *brw)
>
> unsigned local_id_dwords = 0;
>
> - if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
> - local_id_dwords =
> - brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
> - }
> + if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
> + local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
>
> unsigned push_constant_data_size =
> (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
> @@ -191,63 +189,6 @@ const struct brw_tracked_state brw_cs_state = {
>
>
> /**
> - * We are building the local ID push constant data using the simplest possible
> - * method. We simply push the local IDs directly as they should appear in the
> - * registers for the uvec3 gl_LocalInvocationID variable.
> - *
> - * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
> - * registers worth of push constant space.
> - *
> - * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
> - * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
> - * to coordinated.
> - *
> - * FINISHME: There are a few easy optimizations to consider.
> - *
> - * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
> - * no need for using push constant space for that dimension.
> - *
> - * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
> - * easily use 16-bit words rather than 32-bit dwords in the push constant
> - * data.
> - *
> - * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
> - * conveying the data, and thereby reduce push constant usage.
> - *
> - */
> -unsigned
> -brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width)
> -{
> - return 3 * dispatch_width;
> -}
> -
> -
> -static void
> -fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
> - void *buffer, unsigned *x, unsigned *y, unsigned *z)
> -{
> - uint32_t *param = (uint32_t *)buffer;
> - for (unsigned i = 0; i < cs_prog_data->simd_size; i++) {
> - param[0 * cs_prog_data->simd_size + i] = *x;
> - param[1 * cs_prog_data->simd_size + i] = *y;
> - param[2 * cs_prog_data->simd_size + i] = *z;
> -
> - (*x)++;
> - if (*x == cs_prog_data->local_size[0]) {
> - *x = 0;
> - (*y)++;
> - if (*y == cs_prog_data->local_size[1]) {
> - *y = 0;
> - (*z)++;
> - if (*z == cs_prog_data->local_size[2])
> - *z = 0;
> - }
> - }
> - }
> -}
> -
> -
> -/**
> * Creates a region containing the push constants for the CS on gen7+.
> *
> * Push constants are constant values (such as GLSL uniforms) that are
> @@ -269,10 +210,8 @@ brw_upload_cs_push_constants(struct brw_context *brw,
> (struct brw_stage_prog_data*) cs_prog_data;
> unsigned local_id_dwords = 0;
>
> - if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
> - local_id_dwords =
> - brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
> - }
> + if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
> + local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
>
> /* Updates the ParamaterValues[i] pointers for all parameters of the
> * basic type of PROGRAM_STATE_VAR.
> @@ -302,14 +241,13 @@ brw_upload_cs_push_constants(struct brw_context *brw,
>
> STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
>
> + brw_cs_fill_local_id_payload(cs_prog_data, param, threads,
> + reg_aligned_constant_size);
> +
> /* _NEW_PROGRAM_CONSTANTS */
> - unsigned x = 0, y = 0, z = 0;
> for (t = 0; t < threads; t++) {
> - gl_constant_value *next_param = ¶m[t * param_aligned_count];
> - if (local_id_dwords > 0) {
> - fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z);
> - next_param += local_id_dwords;
> - }
> + gl_constant_value *next_param =
> + ¶m[t * param_aligned_count + local_id_dwords];
> for (i = 0; i < prog_data->nr_params; i++) {
> next_param[i] = *prog_data->param[i];
> }
> --
> 2.4.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list