[Mesa-dev] [PATCH v2 2/2] i965: Implement ARB_compute_variable_group_size.

Manolova, Plamena plamena.manolova at intel.com
Thu Jun 28 16:52:35 UTC 2018


Hi Karol,
Thank you for reviewing! I'll go ahead and push the changes you need from
nir_lower_system_values.c to master.

Thank you,
Pam

On Thu, Jun 28, 2018 at 5:50 AM, Karol Herbst <kherbst at redhat.com> wrote:

> Hi,
>
> if the changes inside "src/compiler/nir/nir_lower_system_values.c" are
> extracted into a seperate patch, this patch with the equal changes
> would be
>
> Reviewed-by: Karol Herbst <kherbst at redhat.com>
>
> I would need that for a nir to codegen pass for Nouveau and maybe it
> will help other drivers implementing this extension as well. I don't
> think it would hurt to extract those, right?
>
> Thanks!
>
> On Thu, Jun 7, 2018 at 5:34 PM, Plamena Manolova
> <plamena.n.manolova at gmail.com> wrote:
> > This patch adds the implementation of ARB_compute_variable_group_size
> > for i965. We do this by storing the group size in a buffer surface,
> > similarly to the work group number.
> >
> > v2: Fix some indentation inconsistencies (Jordan, Ilia)
> >     Do DIV_ROUND_UP correctly in brw_nir_lower_cs_intrinsics.c (Jordan)
> >     Use alphabetical order in features.txt (Matt)
> >     Set the extension constants properly in brw_context.c
> >
> > Signed-off-by: Plamena Manolova <plamena.n.manolova at gmail.com>
> > ---
> >  docs/features.txt                                |  2 +-
> >  docs/relnotes/18.2.0.html                        |  1 +
> >  src/compiler/nir/nir_lower_system_values.c       | 13 ++++
> >  src/intel/compiler/brw_compiler.h                |  2 +
> >  src/intel/compiler/brw_fs.cpp                    | 45 ++++++++----
> >  src/intel/compiler/brw_fs_nir.cpp                | 20 ++++++
> >  src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 88
> +++++++++++++++++-------
> >  src/mesa/drivers/dri/i965/brw_compute.c          | 25 ++++++-
> >  src/mesa/drivers/dri/i965/brw_context.c          |  6 ++
> >  src/mesa/drivers/dri/i965/brw_context.h          |  1 +
> >  src/mesa/drivers/dri/i965/brw_cs.c               |  4 ++
> >  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 27 +++++++-
> >  src/mesa/drivers/dri/i965/intel_extensions.c     |  1 +
> >  13 files changed, 193 insertions(+), 42 deletions(-)
> >
> > diff --git a/docs/features.txt b/docs/features.txt
> > index ed4050cf98..81b6663288 100644
> > --- a/docs/features.txt
> > +++ b/docs/features.txt
> > @@ -298,7 +298,7 @@ Khronos, ARB, and OES extensions that are not part
> of any OpenGL or OpenGL ES ve
> >
> >    GL_ARB_bindless_texture                               DONE (nvc0,
> radeonsi)
> >    GL_ARB_cl_event                                       not started
> > -  GL_ARB_compute_variable_group_size                    DONE (nvc0,
> radeonsi)
> > +  GL_ARB_compute_variable_group_size                    DONE (i965,
> nvc0, radeonsi)
> >    GL_ARB_ES3_2_compatibility                            DONE
> (i965/gen8+)
> >    GL_ARB_fragment_shader_interlock                      DONE (i965)
> >    GL_ARB_gpu_shader_int64                               DONE
> (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe)
> > diff --git a/docs/relnotes/18.2.0.html b/docs/relnotes/18.2.0.html
> > index 0db37b620d..7475a56633 100644
> > --- a/docs/relnotes/18.2.0.html
> > +++ b/docs/relnotes/18.2.0.html
> > @@ -52,6 +52,7 @@ Note: some of the new features are only available with
> certain drivers.
> >
> >  <ul>
> >  <li>GL_ARB_fragment_shader_interlock on i965</li>
> > +<li>GL_ARB_compute_variable_group_size on i965</li>
> >  </ul>
> >
> >  <h2>Bug fixes</h2>
> > diff --git a/src/compiler/nir/nir_lower_system_values.c
> b/src/compiler/nir/nir_lower_system_values.c
> > index 487da04262..7ab005b000 100644
> > --- a/src/compiler/nir/nir_lower_system_values.c
> > +++ b/src/compiler/nir/nir_lower_system_values.c
> > @@ -57,6 +57,14 @@ convert_block(nir_block *block, nir_builder *b)
> >            *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
> >            */
> >
> > +         /*
> > +          * If the local work group size is variable we can't lower the
> global
> > +          * invocation id here.
> > +          */
> > +         if (b->shader->info.cs.local_size_variable) {
> > +            break;
> > +         }
> > +
> >           nir_const_value local_size;
> >           memset(&local_size, 0, sizeof(local_size));
> >           local_size.u32[0] = b->shader->info.cs.local_size[0];
> > @@ -102,6 +110,11 @@ convert_block(nir_block *block, nir_builder *b)
> >        }
> >
> >        case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {
> > +         /* If the local work group size is variable we can't lower it
> here */
> > +         if (b->shader->info.cs.local_size_variable) {
> > +            break;
> > +         }
> > +
> >           nir_const_value local_size;
> >           memset(&local_size, 0, sizeof(local_size));
> >           local_size.u32[0] = b->shader->info.cs.local_size[0];
> > diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_
> compiler.h
> > index 8b4e6fe2e2..f54952c28f 100644
> > --- a/src/intel/compiler/brw_compiler.h
> > +++ b/src/intel/compiler/brw_compiler.h
> > @@ -759,6 +759,7 @@ struct brw_cs_prog_data {
> >     unsigned threads;
> >     bool uses_barrier;
> >     bool uses_num_work_groups;
> > +   bool uses_variable_group_size;
> >
> >     struct {
> >        struct brw_push_const_block cross_thread;
> > @@ -771,6 +772,7 @@ struct brw_cs_prog_data {
> >         * surface indices the CS-specific surfaces
> >         */
> >        uint32_t work_groups_start;
> > +      uint32_t work_group_size_start;
> >        /** @} */
> >     } binding_table;
> >  };
> > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.
> cpp
> > index d67c0a4192..28730af47b 100644
> > --- a/src/intel/compiler/brw_fs.cpp
> > +++ b/src/intel/compiler/brw_fs.cpp
> > @@ -7228,18 +7228,32 @@ brw_compile_cs(const struct brw_compiler
> *compiler, void *log_data,
> >                 int shader_time_index,
> >                 char **error_str)
> >  {
> > -   prog_data->local_size[0] = src_shader->info.cs.local_size[0];
> > -   prog_data->local_size[1] = src_shader->info.cs.local_size[1];
> > -   prog_data->local_size[2] = src_shader->info.cs.local_size[2];
> > -   unsigned local_workgroup_size =
> > -      src_shader->info.cs.local_size[0] * src_shader->info.cs.local_size[1]
> *
> > -      src_shader->info.cs.local_size[2];
> > -
> > -   unsigned min_dispatch_width =
> > -      DIV_ROUND_UP(local_workgroup_size, compiler->devinfo->max_cs_
> threads);
> > -   min_dispatch_width = MAX2(8, min_dispatch_width);
> > -   min_dispatch_width = util_next_power_of_two(min_dispatch_width);
> > -   assert(min_dispatch_width <= 32);
> > +   unsigned min_dispatch_width;
> > +
> > +   if (!src_shader->info.cs.local_size_variable) {
> > +      unsigned local_workgroup_size =
> > +         src_shader->info.cs.local_size[0] * src_shader->info.cs.local_size[1]
> *
> > +         src_shader->info.cs.local_size[2];
> > +
> > +      min_dispatch_width =
> > +         DIV_ROUND_UP(local_workgroup_size, compiler->devinfo->max_cs_
> threads);
> > +      min_dispatch_width = MAX2(8, min_dispatch_width);
> > +      min_dispatch_width = util_next_power_of_two(min_dispatch_width);
> > +      assert(min_dispatch_width <= 32);
> > +
> > +      prog_data->local_size[0] = src_shader->info.cs.local_size[0];
> > +      prog_data->local_size[1] = src_shader->info.cs.local_size[1];
> > +      prog_data->local_size[2] = src_shader->info.cs.local_size[2];
> > +      prog_data->uses_variable_group_size = false;
> > +   } else {
> > +      /*
> > +       * If the local work group size is variable we have to use a
> dispatch
> > +       * width of 32 here, since at this point we don't know the actual
> size of
> > +       * the workload.
> > +       */
> > +      min_dispatch_width = 32;
> > +      prog_data->uses_variable_group_size = true;
> > +   }
> >
> >     fs_visitor *v8 = NULL, *v16 = NULL, *v32 = NULL;
> >     cfg_t *cfg = NULL;
> > @@ -7324,7 +7338,12 @@ brw_compile_cs(const struct brw_compiler
> *compiler, void *log_data,
> >           }
> >        } else {
> >           cfg = v32->cfg;
> > -         cs_set_simd_size(prog_data, 32);
> > +         if (!src_shader->info.cs.local_size_variable) {
> > +            cs_set_simd_size(prog_data, 32);
> > +         } else {
> > +            prog_data->simd_size = 32;
> > +            prog_data->threads = compiler->devinfo->max_cs_threads;
> > +         }
> >           cs_fill_push_const_info(compiler->devinfo, prog_data);
> >           promoted_constants = v32->promoted_constants;
> >        }
> > diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> > index 166da0aa6d..c4948c2347 100644
> > --- a/src/intel/compiler/brw_fs_nir.cpp
> > +++ b/src/intel/compiler/brw_fs_nir.cpp
> > @@ -3766,6 +3766,26 @@ fs_visitor::nir_emit_cs_intrinsic(const
> fs_builder &bld,
> >        break;
> >     }
> >
> > +   case nir_intrinsic_load_local_group_size: {
> > +      const unsigned surface =
> > +         cs_prog_data->binding_table.work_group_size_start;
> > +
> > +      fs_reg surf_index = brw_imm_ud(surface);
> > +      brw_mark_surface_used(prog_data, surface);
> > +
> > +      /* Read the 3 GLuint components of gl_NumWorkGroups */
> > +      for (unsigned i = 0; i < 3; i++) {
> > +         fs_reg read_result =
> > +            emit_untyped_read(bld, surf_index,
> > +                              brw_imm_ud(i << 2),
> > +                              1 /* dims */, 1 /* size */,
> > +                              BRW_PREDICATE_NONE);
> > +         read_result.type = dest.type;
> > +         bld.MOV(dest, read_result);
> > +         dest = offset(dest, bld, 1);
> > +      }
> > +      break;
> > +   }
> >     default:
> >        nir_emit_intrinsic(bld, instr);
> >        break;
> > diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> > index bfbdea0e8f..096e86db19 100644
> > --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> > +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> > @@ -58,10 +58,12 @@ lower_cs_intrinsics_convert_block(struct
> lower_intrinsics_state *state,
> >            *       cs_thread_local_id + subgroup_invocation;
> >            */
> >           nir_ssa_def *subgroup_id;
> > -         if (state->local_workgroup_size <= state->dispatch_width)
> > +         if ((state->local_workgroup_size <= state->dispatch_width) &&
> > +             !state->nir->info.cs.local_size_variable) {
> >              subgroup_id = nir_imm_int(b, 0);
> > -         else
> > +         } else {
> >              subgroup_id = nir_load_subgroup_id(b);
> > +         }
> >
> >           nir_ssa_def *thread_local_id =
> >              nir_imul(b, subgroup_id, nir_imm_int(b,
> state->dispatch_width));
> > @@ -84,43 +86,81 @@ lower_cs_intrinsics_convert_block(struct
> lower_intrinsics_state *state,
> >            *        (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
> >            *       gl_WorkGroupSize.z;
> >            */
> > -         unsigned *size = nir->info.cs.local_size;
> > -
> >           nir_ssa_def *local_index = nir_load_local_invocation_index(b);
> > -
> > -         nir_const_value uvec3;
> > -         memset(&uvec3, 0, sizeof(uvec3));
> > -         uvec3.u32[0] = 1;
> > -         uvec3.u32[1] = size[0];
> > -         uvec3.u32[2] = size[0] * size[1];
> > -         nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);
> > -         uvec3.u32[0] = size[0];
> > -         uvec3.u32[1] = size[1];
> > -         uvec3.u32[2] = size[2];
> > -         nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);
> > -
> > -         sysval = nir_umod(b, nir_udiv(b, local_index, div_val),
> mod_val);
> > +         if (!state->nir->info.cs.local_size_variable) {
> > +            unsigned *size = nir->info.cs.local_size;
> > +
> > +            nir_const_value uvec3;
> > +            memset(&uvec3, 0, sizeof(uvec3));
> > +            uvec3.u32[0] = 1;
> > +            uvec3.u32[1] = size[0];
> > +            uvec3.u32[2] = size[0] * size[1];
> > +            nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);
> > +            uvec3.u32[0] = size[0];
> > +            uvec3.u32[1] = size[1];
> > +            uvec3.u32[2] = size[2];
> > +            nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);
> > +
> > +            sysval = nir_umod(b, nir_udiv(b, local_index, div_val),
> mod_val);
> > +         } else {
> > +            nir_ssa_def *group_size_xyz = nir_load_local_group_size(b);
> > +            nir_ssa_def *group_size_x = nir_channel(b, group_size_xyz,
> 0);
> > +            nir_ssa_def *group_size_y = nir_channel(b, group_size_xyz,
> 1);
> > +            nir_ssa_def *group_size_z = nir_channel(b, group_size_xyz,
> 2);
> > +            nir_ssa_def *result[3];
> > +            result[0] = nir_umod(b, local_index, group_size_x);
> > +            result[1] = nir_umod(b, nir_udiv(b, local_index,
> group_size_x),
> > +               group_size_y);
> > +            result[2] = nir_umod(b, nir_udiv(b, local_index,
> > +               nir_umul_high(b, group_size_x, group_size_y)),
> group_size_z);
> > +
> > +            sysval = nir_vec(b, result, 3);
> > +         }
> >           break;
> >        }
> >
> >        case nir_intrinsic_load_subgroup_id:
> > -         if (state->local_workgroup_size > 8)
> > +         if (state->local_workgroup_size > 8 ||
> > +             state->nir->info.cs.local_size_variable) {
> >              continue;
> > +        }
> >
> >           /* For small workgroup sizes, we know subgroup_id will be zero
> */
> >           sysval = nir_imm_int(b, 0);
> >           break;
> >
> >        case nir_intrinsic_load_num_subgroups: {
> > -         unsigned local_workgroup_size =
> > -            nir->info.cs.local_size[0] * nir->info.cs.local_size[1] *
> > -            nir->info.cs.local_size[2];
> > -         unsigned num_subgroups =
> > -            DIV_ROUND_UP(local_workgroup_size, state->dispatch_width);
> > -         sysval = nir_imm_int(b, num_subgroups);
> > +         if (!state->nir->info.cs.local_size_variable) {
> > +            unsigned num_subgroups;
> > +            unsigned local_workgroup_size =
> > +               nir->info.cs.local_size[0] * nir->info.cs.local_size[1] *
> > +               nir->info.cs.local_size[2];
> > +            num_subgroups =
> > +               DIV_ROUND_UP(local_workgroup_size,
> state->dispatch_width);
> > +            sysval = nir_imm_int(b, num_subgroups);
> > +         } else {
> > +            nir_ssa_def *group_size_xyz = nir_load_local_group_size(b);
> > +            nir_ssa_def *group_size_x = nir_channel(b, group_size_xyz,
> 0);
> > +            nir_ssa_def *group_size_y = nir_channel(b, group_size_xyz,
> 1);
> > +            nir_ssa_def *group_size_z = nir_channel(b, group_size_xyz,
> 2);
> > +            nir_ssa_def *group_size = nir_imul(b, group_size_x,
> nir_imul(b,
> > +               group_size_y, group_size_z));
> > +            nir_ssa_def *dispatch_width = nir_imm_int(b,
> > +               state->dispatch_width - 1);
> > +
> > +            sysval = nir_udiv(b, group_size, dispatch_width);
> > +         }
> >           break;
> >        }
> >
> > +      case nir_intrinsic_load_global_invocation_id: {
> > +         nir_ssa_def *group_id = nir_load_work_group_id(b);
> > +         nir_ssa_def *local_id = nir_load_local_invocation_id(b);
> > +         nir_ssa_def *group_size = nir_load_local_group_size(b);
> > +
> > +         sysval = nir_iadd(b, nir_imul(b, group_id, group_size),
> local_id);
> > +         break;
> > +      }
> >        default:
> >           continue;
> >        }
> > diff --git a/src/mesa/drivers/dri/i965/brw_compute.c
> b/src/mesa/drivers/dri/i965/brw_compute.c
> > index de08fc3ac1..7949e0ff51 100644
> > --- a/src/mesa/drivers/dri/i965/brw_compute.c
> > +++ b/src/mesa/drivers/dri/i965/brw_compute.c
> > @@ -121,8 +121,11 @@ brw_emit_gpgpu_walker(struct brw_context *brw)
> >     }
> >
> >     const unsigned simd_size = prog_data->simd_size;
> > -   unsigned group_size = prog_data->local_size[0] *
> > -      prog_data->local_size[1] * prog_data->local_size[2];
> > +   unsigned group_size = brw->compute.group_size != NULL ?
> > +      brw->compute.group_size[0] * brw->compute.group_size[1] *
> > +         brw->compute.group_size[2] : prog_data->local_size[0] *
> > +            prog_data->local_size[1] * prog_data->local_size[2];
> > +
> >     unsigned thread_width_max =
> >        (group_size + simd_size - 1) / simd_size;
> >
> > @@ -229,6 +232,7 @@ brw_dispatch_compute(struct gl_context *ctx, const
> GLuint *num_groups) {
> >
> >     brw->compute.num_work_groups_bo = NULL;
> >     brw->compute.num_work_groups = num_groups;
> > +   brw->compute.group_size = NULL;
> >     ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
> >
> >     brw_dispatch_compute_common(ctx);
> > @@ -248,6 +252,22 @@ brw_dispatch_compute_indirect(struct gl_context
> *ctx, GLintptr indirect)
> >     brw->compute.num_work_groups_bo = bo;
> >     brw->compute.num_work_groups_offset = indirect;
> >     brw->compute.num_work_groups = indirect_group_counts;
> > +   brw->compute.group_size = NULL;
> > +   ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
> > +
> > +   brw_dispatch_compute_common(ctx);
> > +}
> > +
> > +static void
> > +brw_dispatch_compute_group_size(struct gl_context *ctx,
> > +                                const GLuint *num_groups,
> > +                                const GLuint *group_size)
> > +{
> > +   struct brw_context *brw = brw_context(ctx);
> > +
> > +   brw->compute.num_work_groups_bo = NULL;
> > +   brw->compute.num_work_groups = num_groups;
> > +   brw->compute.group_size = group_size;
> >     ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
> >
> >     brw_dispatch_compute_common(ctx);
> > @@ -258,4 +278,5 @@ brw_init_compute_functions(struct dd_function_table
> *functions)
> >  {
> >     functions->DispatchCompute = brw_dispatch_compute;
> >     functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
> > +   functions->DispatchComputeGroupSize = brw_dispatch_compute_group_
> size;
> >  }
> > diff --git a/src/mesa/drivers/dri/i965/brw_context.c
> b/src/mesa/drivers/dri/i965/brw_context.c
> > index 9ced230ec1..25d354e155 100644
> > --- a/src/mesa/drivers/dri/i965/brw_context.c
> > +++ b/src/mesa/drivers/dri/i965/brw_context.c
> > @@ -766,6 +766,12 @@ brw_initialize_cs_context_constants(struct
> brw_context *brw)
> >     ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
> >     ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
> >     ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
> > +
> > +   /* ARB_compute_variable_group_size constants */
> > +   ctx->Const.MaxComputeVariableGroupSize[0] = max_invocations;
> > +   ctx->Const.MaxComputeVariableGroupSize[1] = max_invocations;
> > +   ctx->Const.MaxComputeVariableGroupSize[2] = max_invocations;
> > +   ctx->Const.MaxComputeVariableGroupInvocations = max_invocations;
> >  }
> >
> >  /**
> > diff --git a/src/mesa/drivers/dri/i965/brw_context.h
> b/src/mesa/drivers/dri/i965/brw_context.h
> > index 2613b9fda2..0fb533c369 100644
> > --- a/src/mesa/drivers/dri/i965/brw_context.h
> > +++ b/src/mesa/drivers/dri/i965/brw_context.h
> > @@ -931,6 +931,7 @@ struct brw_context
> >        struct brw_bo *num_work_groups_bo;
> >        GLintptr num_work_groups_offset;
> >        const GLuint *num_work_groups;
> > +      const GLuint *group_size;
> >     } compute;
> >
> >     struct {
> > diff --git a/src/mesa/drivers/dri/i965/brw_cs.c
> b/src/mesa/drivers/dri/i965/brw_cs.c
> > index e3f8fc67a4..007273390b 100644
> > --- a/src/mesa/drivers/dri/i965/brw_cs.c
> > +++ b/src/mesa/drivers/dri/i965/brw_cs.c
> > @@ -43,6 +43,10 @@ assign_cs_binding_table_offsets(const struct
> gen_device_info *devinfo,
> >     prog_data->binding_table.work_groups_start =
> next_binding_table_offset;
> >     next_binding_table_offset++;
> >
> > +   /* May not be used if the work group size is not variable. */
> > +   prog_data->binding_table.work_group_size_start =
> next_binding_table_offset;
> > +   next_binding_table_offset++;
> > +
> >     brw_assign_common_binding_table_offsets(devinfo, prog,
> &prog_data->base,
> >                                             next_binding_table_offset);
> >  }
> > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> > index 73cae9ef7c..fa8851e2b4 100644
> > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> > @@ -1634,7 +1634,7 @@ const struct brw_tracked_state
> brw_wm_image_surfaces = {
> >  };
> >
> >  static void
> > -brw_upload_cs_work_groups_surface(struct brw_context *brw)
> > +brw_upload_cs_variable_surfaces(struct brw_context *brw)
> >  {
> >     struct gl_context *ctx = &brw->ctx;
> >     /* _NEW_PROGRAM */
> > @@ -1671,6 +1671,29 @@ brw_upload_cs_work_groups_surface(struct
> brw_context *brw)
> >                                      RELOC_WRITE);
> >        brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
> >     }
> > +
> > +   if (prog && cs_prog_data->uses_variable_group_size) {
> > +      const unsigned surf_idx =
> > +         cs_prog_data->binding_table.work_group_size_start;
> > +      uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
> > +      struct brw_bo *bo;
> > +      uint32_t bo_offset;
> > +
> > +      bo = NULL;
> > +      brw_upload_data(&brw->upload,
> > +                     (void *)brw->compute.group_size,
> > +                      3 * sizeof(GLuint),
> > +                      sizeof(GLuint),
> > +                      &bo,
> > +                      &bo_offset);
> > +
> > +      brw_emit_buffer_surface_state(brw, surf_offset,
> > +                                    bo, bo_offset,
> > +                                    ISL_FORMAT_RAW,
> > +                                    3 * sizeof(GLuint), 1,
> > +                                    RELOC_WRITE);
> > +      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
> > +   }
> >  }
> >
> >  const struct brw_tracked_state brw_cs_work_groups_surface = {
> > @@ -1678,5 +1701,5 @@ const struct brw_tracked_state
> brw_cs_work_groups_surface = {
> >        .brw = BRW_NEW_CS_PROG_DATA |
> >               BRW_NEW_CS_WORK_GROUPS
> >     },
> > -   .emit = brw_upload_cs_work_groups_surface,
> > +   .emit = brw_upload_cs_variable_surfaces,
> >  };
> > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c
> b/src/mesa/drivers/dri/i965/intel_extensions.c
> > index 5a9369d7b4..f213360ed8 100644
> > --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> > @@ -258,6 +258,7 @@ intelInitExtensions(struct gl_context *ctx)
> >              ctx->Extensions.ARB_compute_shader = true;
> >              ctx->Extensions.ARB_ES3_1_compatibility =
> >                 devinfo->gen >= 8 || devinfo->is_haswell;
> > +            ctx->Extensions.ARB_compute_variable_group_size = true;
> >           }
> >
> >           if (can_do_predicate_writes(brw->screen)) {
> > --
> > 2.11.0
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180628/101a2f5b/attachment-0001.html>


More information about the mesa-dev mailing list