[Mesa-dev] [PATCH v2 2/2] i965: Implement ARB_compute_variable_group_size.
Karol Herbst
kherbst at redhat.com
Thu Nov 8 21:31:48 UTC 2018
any update on that?
I could take care of extracting the bits myself, I was just expecting
that you would take care of that.
On Thu, Jun 28, 2018 at 6:53 PM Manolova, Plamena
<plamena.manolova at intel.com> wrote:
>
> Hi Karol,
> Thank you for reviewing! I'll go ahead and push the changes you need from
> nir_lower_system_values.c to master.
>
> Thank you,
> Pam
>
> On Thu, Jun 28, 2018 at 5:50 AM, Karol Herbst <kherbst at redhat.com> wrote:
>>
>> Hi,
>>
>> if the changes inside "src/compiler/nir/nir_lower_system_values.c" are
>> extracted into a seperate patch, this patch with the equal changes
>> would be
>>
>> Reviewed-by: Karol Herbst <kherbst at redhat.com>
>>
>> I would need that for a nir to codegen pass for Nouveau and maybe it
>> will help other drivers implementing this extension as well. I don't
>> think it would hurt to extract those, right?
>>
>> Thanks!
>>
>> On Thu, Jun 7, 2018 at 5:34 PM, Plamena Manolova
>> <plamena.n.manolova at gmail.com> wrote:
>> > This patch adds the implementation of ARB_compute_variable_group_size
>> > for i965. We do this by storing the group size in a buffer surface,
>> > similarly to the work group number.
>> >
>> > v2: Fix some indentation inconsistencies (Jordan, Ilia)
>> > Do DIV_ROUND_UP correctly in brw_nir_lower_cs_intrinsics.c (Jordan)
>> > Use alphabetical order in features.txt (Matt)
>> > Set the extension constants properly in brw_context.c
>> >
>> > Signed-off-by: Plamena Manolova <plamena.n.manolova at gmail.com>
>> > ---
>> > docs/features.txt | 2 +-
>> > docs/relnotes/18.2.0.html | 1 +
>> > src/compiler/nir/nir_lower_system_values.c | 13 ++++
>> > src/intel/compiler/brw_compiler.h | 2 +
>> > src/intel/compiler/brw_fs.cpp | 45 ++++++++----
>> > src/intel/compiler/brw_fs_nir.cpp | 20 ++++++
>> > src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 88 +++++++++++++++++-------
>> > src/mesa/drivers/dri/i965/brw_compute.c | 25 ++++++-
>> > src/mesa/drivers/dri/i965/brw_context.c | 6 ++
>> > src/mesa/drivers/dri/i965/brw_context.h | 1 +
>> > src/mesa/drivers/dri/i965/brw_cs.c | 4 ++
>> > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 27 +++++++-
>> > src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
>> > 13 files changed, 193 insertions(+), 42 deletions(-)
>> >
>> > diff --git a/docs/features.txt b/docs/features.txt
>> > index ed4050cf98..81b6663288 100644
>> > --- a/docs/features.txt
>> > +++ b/docs/features.txt
>> > @@ -298,7 +298,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
>> >
>> > GL_ARB_bindless_texture DONE (nvc0, radeonsi)
>> > GL_ARB_cl_event not started
>> > - GL_ARB_compute_variable_group_size DONE (nvc0, radeonsi)
>> > + GL_ARB_compute_variable_group_size DONE (i965, nvc0, radeonsi)
>> > GL_ARB_ES3_2_compatibility DONE (i965/gen8+)
>> > GL_ARB_fragment_shader_interlock DONE (i965)
>> > GL_ARB_gpu_shader_int64 DONE (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe)
>> > diff --git a/docs/relnotes/18.2.0.html b/docs/relnotes/18.2.0.html
>> > index 0db37b620d..7475a56633 100644
>> > --- a/docs/relnotes/18.2.0.html
>> > +++ b/docs/relnotes/18.2.0.html
>> > @@ -52,6 +52,7 @@ Note: some of the new features are only available with certain drivers.
>> >
>> > <ul>
>> > <li>GL_ARB_fragment_shader_interlock on i965</li>
>> > +<li>GL_ARB_compute_variable_group_size on i965</li>
>> > </ul>
>> >
>> > <h2>Bug fixes</h2>
>> > diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c
>> > index 487da04262..7ab005b000 100644
>> > --- a/src/compiler/nir/nir_lower_system_values.c
>> > +++ b/src/compiler/nir/nir_lower_system_values.c
>> > @@ -57,6 +57,14 @@ convert_block(nir_block *block, nir_builder *b)
>> > * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
>> > */
>> >
>> > + /*
>> > + * If the local work group size is variable we can't lower the global
>> > + * invocation id here.
>> > + */
>> > + if (b->shader->info.cs.local_size_variable) {
>> > + break;
>> > + }
>> > +
>> > nir_const_value local_size;
>> > memset(&local_size, 0, sizeof(local_size));
>> > local_size.u32[0] = b->shader->info.cs.local_size[0];
>> > @@ -102,6 +110,11 @@ convert_block(nir_block *block, nir_builder *b)
>> > }
>> >
>> > case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {
>> > + /* If the local work group size is variable we can't lower it here */
>> > + if (b->shader->info.cs.local_size_variable) {
>> > + break;
>> > + }
>> > +
>> > nir_const_value local_size;
>> > memset(&local_size, 0, sizeof(local_size));
>> > local_size.u32[0] = b->shader->info.cs.local_size[0];
>> > diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
>> > index 8b4e6fe2e2..f54952c28f 100644
>> > --- a/src/intel/compiler/brw_compiler.h
>> > +++ b/src/intel/compiler/brw_compiler.h
>> > @@ -759,6 +759,7 @@ struct brw_cs_prog_data {
>> > unsigned threads;
>> > bool uses_barrier;
>> > bool uses_num_work_groups;
>> > + bool uses_variable_group_size;
>> >
>> > struct {
>> > struct brw_push_const_block cross_thread;
>> > @@ -771,6 +772,7 @@ struct brw_cs_prog_data {
>> > * surface indices the CS-specific surfaces
>> > */
>> > uint32_t work_groups_start;
>> > + uint32_t work_group_size_start;
>> > /** @} */
>> > } binding_table;
>> > };
>> > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
>> > index d67c0a4192..28730af47b 100644
>> > --- a/src/intel/compiler/brw_fs.cpp
>> > +++ b/src/intel/compiler/brw_fs.cpp
>> > @@ -7228,18 +7228,32 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
>> > int shader_time_index,
>> > char **error_str)
>> > {
>> > - prog_data->local_size[0] = src_shader->info.cs.local_size[0];
>> > - prog_data->local_size[1] = src_shader->info.cs.local_size[1];
>> > - prog_data->local_size[2] = src_shader->info.cs.local_size[2];
>> > - unsigned local_workgroup_size =
>> > - src_shader->info.cs.local_size[0] * src_shader->info.cs.local_size[1] *
>> > - src_shader->info.cs.local_size[2];
>> > -
>> > - unsigned min_dispatch_width =
>> > - DIV_ROUND_UP(local_workgroup_size, compiler->devinfo->max_cs_threads);
>> > - min_dispatch_width = MAX2(8, min_dispatch_width);
>> > - min_dispatch_width = util_next_power_of_two(min_dispatch_width);
>> > - assert(min_dispatch_width <= 32);
>> > + unsigned min_dispatch_width;
>> > +
>> > + if (!src_shader->info.cs.local_size_variable) {
>> > + unsigned local_workgroup_size =
>> > + src_shader->info.cs.local_size[0] * src_shader->info.cs.local_size[1] *
>> > + src_shader->info.cs.local_size[2];
>> > +
>> > + min_dispatch_width =
>> > + DIV_ROUND_UP(local_workgroup_size, compiler->devinfo->max_cs_threads);
>> > + min_dispatch_width = MAX2(8, min_dispatch_width);
>> > + min_dispatch_width = util_next_power_of_two(min_dispatch_width);
>> > + assert(min_dispatch_width <= 32);
>> > +
>> > + prog_data->local_size[0] = src_shader->info.cs.local_size[0];
>> > + prog_data->local_size[1] = src_shader->info.cs.local_size[1];
>> > + prog_data->local_size[2] = src_shader->info.cs.local_size[2];
>> > + prog_data->uses_variable_group_size = false;
>> > + } else {
>> > + /*
>> > + * If the local work group size is variable we have to use a dispatch
>> > + * width of 32 here, since at this point we don't know the actual size of
>> > + * the workload.
>> > + */
>> > + min_dispatch_width = 32;
>> > + prog_data->uses_variable_group_size = true;
>> > + }
>> >
>> > fs_visitor *v8 = NULL, *v16 = NULL, *v32 = NULL;
>> > cfg_t *cfg = NULL;
>> > @@ -7324,7 +7338,12 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
>> > }
>> > } else {
>> > cfg = v32->cfg;
>> > - cs_set_simd_size(prog_data, 32);
>> > + if (!src_shader->info.cs.local_size_variable) {
>> > + cs_set_simd_size(prog_data, 32);
>> > + } else {
>> > + prog_data->simd_size = 32;
>> > + prog_data->threads = compiler->devinfo->max_cs_threads;
>> > + }
>> > cs_fill_push_const_info(compiler->devinfo, prog_data);
>> > promoted_constants = v32->promoted_constants;
>> > }
>> > diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
>> > index 166da0aa6d..c4948c2347 100644
>> > --- a/src/intel/compiler/brw_fs_nir.cpp
>> > +++ b/src/intel/compiler/brw_fs_nir.cpp
>> > @@ -3766,6 +3766,26 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
>> > break;
>> > }
>> >
>> > + case nir_intrinsic_load_local_group_size: {
>> > + const unsigned surface =
>> > + cs_prog_data->binding_table.work_group_size_start;
>> > +
>> > + fs_reg surf_index = brw_imm_ud(surface);
>> > + brw_mark_surface_used(prog_data, surface);
>> > +
>> > + /* Read the 3 GLuint components of gl_NumWorkGroups */
>> > + for (unsigned i = 0; i < 3; i++) {
>> > + fs_reg read_result =
>> > + emit_untyped_read(bld, surf_index,
>> > + brw_imm_ud(i << 2),
>> > + 1 /* dims */, 1 /* size */,
>> > + BRW_PREDICATE_NONE);
>> > + read_result.type = dest.type;
>> > + bld.MOV(dest, read_result);
>> > + dest = offset(dest, bld, 1);
>> > + }
>> > + break;
>> > + }
>> > default:
>> > nir_emit_intrinsic(bld, instr);
>> > break;
>> > diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
>> > index bfbdea0e8f..096e86db19 100644
>> > --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
>> > +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
>> > @@ -58,10 +58,12 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
>> > * cs_thread_local_id + subgroup_invocation;
>> > */
>> > nir_ssa_def *subgroup_id;
>> > - if (state->local_workgroup_size <= state->dispatch_width)
>> > + if ((state->local_workgroup_size <= state->dispatch_width) &&
>> > + !state->nir->info.cs.local_size_variable) {
>> > subgroup_id = nir_imm_int(b, 0);
>> > - else
>> > + } else {
>> > subgroup_id = nir_load_subgroup_id(b);
>> > + }
>> >
>> > nir_ssa_def *thread_local_id =
>> > nir_imul(b, subgroup_id, nir_imm_int(b, state->dispatch_width));
>> > @@ -84,43 +86,81 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
>> > * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
>> > * gl_WorkGroupSize.z;
>> > */
>> > - unsigned *size = nir->info.cs.local_size;
>> > -
>> > nir_ssa_def *local_index = nir_load_local_invocation_index(b);
>> > -
>> > - nir_const_value uvec3;
>> > - memset(&uvec3, 0, sizeof(uvec3));
>> > - uvec3.u32[0] = 1;
>> > - uvec3.u32[1] = size[0];
>> > - uvec3.u32[2] = size[0] * size[1];
>> > - nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);
>> > - uvec3.u32[0] = size[0];
>> > - uvec3.u32[1] = size[1];
>> > - uvec3.u32[2] = size[2];
>> > - nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);
>> > -
>> > - sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val);
>> > + if (!state->nir->info.cs.local_size_variable) {
>> > + unsigned *size = nir->info.cs.local_size;
>> > +
>> > + nir_const_value uvec3;
>> > + memset(&uvec3, 0, sizeof(uvec3));
>> > + uvec3.u32[0] = 1;
>> > + uvec3.u32[1] = size[0];
>> > + uvec3.u32[2] = size[0] * size[1];
>> > + nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);
>> > + uvec3.u32[0] = size[0];
>> > + uvec3.u32[1] = size[1];
>> > + uvec3.u32[2] = size[2];
>> > + nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);
>> > +
>> > + sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val);
>> > + } else {
>> > + nir_ssa_def *group_size_xyz = nir_load_local_group_size(b);
>> > + nir_ssa_def *group_size_x = nir_channel(b, group_size_xyz, 0);
>> > + nir_ssa_def *group_size_y = nir_channel(b, group_size_xyz, 1);
>> > + nir_ssa_def *group_size_z = nir_channel(b, group_size_xyz, 2);
>> > + nir_ssa_def *result[3];
>> > + result[0] = nir_umod(b, local_index, group_size_x);
>> > + result[1] = nir_umod(b, nir_udiv(b, local_index, group_size_x),
>> > + group_size_y);
>> > + result[2] = nir_umod(b, nir_udiv(b, local_index,
>> > + nir_umul_high(b, group_size_x, group_size_y)), group_size_z);
>> > +
>> > + sysval = nir_vec(b, result, 3);
>> > + }
>> > break;
>> > }
>> >
>> > case nir_intrinsic_load_subgroup_id:
>> > - if (state->local_workgroup_size > 8)
>> > + if (state->local_workgroup_size > 8 ||
>> > + state->nir->info.cs.local_size_variable) {
>> > continue;
>> > + }
>> >
>> > /* For small workgroup sizes, we know subgroup_id will be zero */
>> > sysval = nir_imm_int(b, 0);
>> > break;
>> >
>> > case nir_intrinsic_load_num_subgroups: {
>> > - unsigned local_workgroup_size =
>> > - nir->info.cs.local_size[0] * nir->info.cs.local_size[1] *
>> > - nir->info.cs.local_size[2];
>> > - unsigned num_subgroups =
>> > - DIV_ROUND_UP(local_workgroup_size, state->dispatch_width);
>> > - sysval = nir_imm_int(b, num_subgroups);
>> > + if (!state->nir->info.cs.local_size_variable) {
>> > + unsigned num_subgroups;
>> > + unsigned local_workgroup_size =
>> > + nir->info.cs.local_size[0] * nir->info.cs.local_size[1] *
>> > + nir->info.cs.local_size[2];
>> > + num_subgroups =
>> > + DIV_ROUND_UP(local_workgroup_size, state->dispatch_width);
>> > + sysval = nir_imm_int(b, num_subgroups);
>> > + } else {
>> > + nir_ssa_def *group_size_xyz = nir_load_local_group_size(b);
>> > + nir_ssa_def *group_size_x = nir_channel(b, group_size_xyz, 0);
>> > + nir_ssa_def *group_size_y = nir_channel(b, group_size_xyz, 1);
>> > + nir_ssa_def *group_size_z = nir_channel(b, group_size_xyz, 2);
>> > + nir_ssa_def *group_size = nir_imul(b, group_size_x, nir_imul(b,
>> > + group_size_y, group_size_z));
>> > + nir_ssa_def *dispatch_width = nir_imm_int(b,
>> > + state->dispatch_width - 1);
>> > +
>> > + sysval = nir_udiv(b, group_size, dispatch_width);
>> > + }
>> > break;
>> > }
>> >
>> > + case nir_intrinsic_load_global_invocation_id: {
>> > + nir_ssa_def *group_id = nir_load_work_group_id(b);
>> > + nir_ssa_def *local_id = nir_load_local_invocation_id(b);
>> > + nir_ssa_def *group_size = nir_load_local_group_size(b);
>> > +
>> > + sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);
>> > + break;
>> > + }
>> > default:
>> > continue;
>> > }
>> > diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
>> > index de08fc3ac1..7949e0ff51 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_compute.c
>> > +++ b/src/mesa/drivers/dri/i965/brw_compute.c
>> > @@ -121,8 +121,11 @@ brw_emit_gpgpu_walker(struct brw_context *brw)
>> > }
>> >
>> > const unsigned simd_size = prog_data->simd_size;
>> > - unsigned group_size = prog_data->local_size[0] *
>> > - prog_data->local_size[1] * prog_data->local_size[2];
>> > + unsigned group_size = brw->compute.group_size != NULL ?
>> > + brw->compute.group_size[0] * brw->compute.group_size[1] *
>> > + brw->compute.group_size[2] : prog_data->local_size[0] *
>> > + prog_data->local_size[1] * prog_data->local_size[2];
>> > +
>> > unsigned thread_width_max =
>> > (group_size + simd_size - 1) / simd_size;
>> >
>> > @@ -229,6 +232,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
>> >
>> > brw->compute.num_work_groups_bo = NULL;
>> > brw->compute.num_work_groups = num_groups;
>> > + brw->compute.group_size = NULL;
>> > ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
>> >
>> > brw_dispatch_compute_common(ctx);
>> > @@ -248,6 +252,22 @@ brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
>> > brw->compute.num_work_groups_bo = bo;
>> > brw->compute.num_work_groups_offset = indirect;
>> > brw->compute.num_work_groups = indirect_group_counts;
>> > + brw->compute.group_size = NULL;
>> > + ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
>> > +
>> > + brw_dispatch_compute_common(ctx);
>> > +}
>> > +
>> > +static void
>> > +brw_dispatch_compute_group_size(struct gl_context *ctx,
>> > + const GLuint *num_groups,
>> > + const GLuint *group_size)
>> > +{
>> > + struct brw_context *brw = brw_context(ctx);
>> > +
>> > + brw->compute.num_work_groups_bo = NULL;
>> > + brw->compute.num_work_groups = num_groups;
>> > + brw->compute.group_size = group_size;
>> > ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
>> >
>> > brw_dispatch_compute_common(ctx);
>> > @@ -258,4 +278,5 @@ brw_init_compute_functions(struct dd_function_table *functions)
>> > {
>> > functions->DispatchCompute = brw_dispatch_compute;
>> > functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
>> > + functions->DispatchComputeGroupSize = brw_dispatch_compute_group_size;
>> > }
>> > diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
>> > index 9ced230ec1..25d354e155 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_context.c
>> > +++ b/src/mesa/drivers/dri/i965/brw_context.c
>> > @@ -766,6 +766,12 @@ brw_initialize_cs_context_constants(struct brw_context *brw)
>> > ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
>> > ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
>> > ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
>> > +
>> > + /* ARB_compute_variable_group_size constants */
>> > + ctx->Const.MaxComputeVariableGroupSize[0] = max_invocations;
>> > + ctx->Const.MaxComputeVariableGroupSize[1] = max_invocations;
>> > + ctx->Const.MaxComputeVariableGroupSize[2] = max_invocations;
>> > + ctx->Const.MaxComputeVariableGroupInvocations = max_invocations;
>> > }
>> >
>> > /**
>> > diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
>> > index 2613b9fda2..0fb533c369 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_context.h
>> > +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> > @@ -931,6 +931,7 @@ struct brw_context
>> > struct brw_bo *num_work_groups_bo;
>> > GLintptr num_work_groups_offset;
>> > const GLuint *num_work_groups;
>> > + const GLuint *group_size;
>> > } compute;
>> >
>> > struct {
>> > diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
>> > index e3f8fc67a4..007273390b 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_cs.c
>> > +++ b/src/mesa/drivers/dri/i965/brw_cs.c
>> > @@ -43,6 +43,10 @@ assign_cs_binding_table_offsets(const struct gen_device_info *devinfo,
>> > prog_data->binding_table.work_groups_start = next_binding_table_offset;
>> > next_binding_table_offset++;
>> >
>> > + /* May not be used if the work group size is not variable. */
>> > + prog_data->binding_table.work_group_size_start = next_binding_table_offset;
>> > + next_binding_table_offset++;
>> > +
>> > brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base,
>> > next_binding_table_offset);
>> > }
>> > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
>> > index 73cae9ef7c..fa8851e2b4 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
>> > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
>> > @@ -1634,7 +1634,7 @@ const struct brw_tracked_state brw_wm_image_surfaces = {
>> > };
>> >
>> > static void
>> > -brw_upload_cs_work_groups_surface(struct brw_context *brw)
>> > +brw_upload_cs_variable_surfaces(struct brw_context *brw)
>> > {
>> > struct gl_context *ctx = &brw->ctx;
>> > /* _NEW_PROGRAM */
>> > @@ -1671,6 +1671,29 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw)
>> > RELOC_WRITE);
>> > brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
>> > }
>> > +
>> > + if (prog && cs_prog_data->uses_variable_group_size) {
>> > + const unsigned surf_idx =
>> > + cs_prog_data->binding_table.work_group_size_start;
>> > + uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
>> > + struct brw_bo *bo;
>> > + uint32_t bo_offset;
>> > +
>> > + bo = NULL;
>> > + brw_upload_data(&brw->upload,
>> > + (void *)brw->compute.group_size,
>> > + 3 * sizeof(GLuint),
>> > + sizeof(GLuint),
>> > + &bo,
>> > + &bo_offset);
>> > +
>> > + brw_emit_buffer_surface_state(brw, surf_offset,
>> > + bo, bo_offset,
>> > + ISL_FORMAT_RAW,
>> > + 3 * sizeof(GLuint), 1,
>> > + RELOC_WRITE);
>> > + brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
>> > + }
>> > }
>> >
>> > const struct brw_tracked_state brw_cs_work_groups_surface = {
>> > @@ -1678,5 +1701,5 @@ const struct brw_tracked_state brw_cs_work_groups_surface = {
>> > .brw = BRW_NEW_CS_PROG_DATA |
>> > BRW_NEW_CS_WORK_GROUPS
>> > },
>> > - .emit = brw_upload_cs_work_groups_surface,
>> > + .emit = brw_upload_cs_variable_surfaces,
>> > };
>> > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
>> > index 5a9369d7b4..f213360ed8 100644
>> > --- a/src/mesa/drivers/dri/i965/intel_extensions.c
>> > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
>> > @@ -258,6 +258,7 @@ intelInitExtensions(struct gl_context *ctx)
>> > ctx->Extensions.ARB_compute_shader = true;
>> > ctx->Extensions.ARB_ES3_1_compatibility =
>> > devinfo->gen >= 8 || devinfo->is_haswell;
>> > + ctx->Extensions.ARB_compute_variable_group_size = true;
>> > }
>> >
>> > if (can_do_predicate_writes(brw->screen)) {
>> > --
>> > 2.11.0
>> > _______________________________________________
>> > mesa-dev mailing list
>> > mesa-dev at lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
More information about the mesa-dev
mailing list