[Mesa-dev] [PATCH] i965: Drop mark_surface_used mechanism.
Jason Ekstrand
jason at jlekstrand.net
Sun Dec 30 13:46:57 UTC 2018
Finally! I haven't read the patch but whole-heartedly
Acked-by: Jason Ekstrand <jason at jlekstrand.net>
On December 30, 2018 00:01:46 Kenneth Graunke <kenneth at whitecape.org> wrote:
> The original idea was that the backend compiler could eliminate
> surfaces, so we would have it mark which ones are actually used,
> then shrink the binding table accordingly. Unfortunately, it's a
> pretty blunt mechanism - it can only prune things from the end,
> not the middle - since we decide the layout before we even start
> the backend compiler, and only limit the size. It also basically
> gives up if it sees indirect array access.
>
> Besides, we do the vast majority of our surface elimination in NIR
> anyway, not the backend - and I don't see that trend changing any
> time soon. Vulkan abandoned this plan a long time ago, and I don't
> use it in Iris, but it's still been kicking around in i965.
> ---
> src/intel/compiler/brw_compiler.h | 13 --------
> src/intel/compiler/brw_fs.cpp | 4 ---
> src/intel/compiler/brw_fs_generator.cpp | 11 -------
> src/intel/compiler/brw_fs_nir.cpp | 32 -------------------
> src/intel/compiler/brw_vec4_generator.cpp | 9 ------
> src/intel/compiler/brw_vec4_nir.cpp | 29 -----------------
> src/intel/compiler/brw_vec4_visitor.cpp | 2 --
> .../drivers/dri/i965/brw_nir_uniforms.cpp | 4 ---
> src/mesa/drivers/dri/i965/brw_program.c | 5 ++-
> src/mesa/drivers/dri/i965/brw_wm.c | 3 ++
> 10 files changed, 7 insertions(+), 105 deletions(-)
>
> I thought this would let me eliminate nir_tex_instr::texture_array_size
> too, but apparently we use that for the used_by_txf sRGB shenanigans.
>
> Ultimately, I'd like to start using nir_lower_samplers_as_deref instead
> of nir_lower_samplers, and this is one step towards that.
>
> diff --git a/src/intel/compiler/brw_compiler.h
> b/src/intel/compiler/brw_compiler.h
> index e4f4d83c8e8..61a4528d372 100644
> --- a/src/intel/compiler/brw_compiler.h
> +++ b/src/intel/compiler/brw_compiler.h
> @@ -643,19 +643,6 @@ brw_stage_prog_data_add_params(struct
> brw_stage_prog_data *prog_data,
> return prog_data->param + old_nr_params;
> }
>
> -static inline void
> -brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
> - unsigned surf_index)
> -{
> - /* A binding table index is 8 bits and the top 3 values are reserved for
> - * special things (stateless and SLM).
> - */
> - assert(surf_index <= 252);
> -
> - prog_data->binding_table.size_bytes =
> - MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
> -}
> -
> enum brw_barycentric_mode {
> BRW_BARYCENTRIC_PERSPECTIVE_PIXEL = 0,
> BRW_BARYCENTRIC_PERSPECTIVE_CENTROID = 1,
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 590a9b32a8e..d5f70e6a2f1 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -2376,8 +2376,6 @@ fs_visitor::lower_constant_loads()
> inst->src[i].nr = dst.nr;
> inst->src[i].offset = (base & (block_sz - 1)) +
> inst->src[i].offset % 4;
> -
> - brw_mark_surface_used(prog_data, index);
> }
>
> if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
> @@ -2391,8 +2389,6 @@ fs_visitor::lower_constant_loads()
> inst->src[1],
> pull_index * 4);
> inst->remove(block);
> -
> - brw_mark_surface_used(prog_data, index);
> }
> }
> invalidate_live_intervals();
> diff --git a/src/intel/compiler/brw_fs_generator.cpp
> b/src/intel/compiler/brw_fs_generator.cpp
> index 08dd83dded7..97a1760528c 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -315,8 +315,6 @@ fs_generator::fire_fb_write(fs_inst *inst,
>
> if (devinfo->gen >= 6)
> brw_inst_set_rt_slot_group(devinfo, insn, inst->group / 16);
> -
> - brw_mark_surface_used(&prog_data->base, surf_index);
> }
>
> void
> @@ -373,8 +371,6 @@ fs_generator::generate_fb_read(fs_inst *inst, struct
> brw_reg dst,
> gen9_fb_READ(p, dst, payload, surf_index,
> inst->header_size, inst->size_written / REG_SIZE,
> prog_data->persample_dispatch);
> -
> - brw_mark_surface_used(&prog_data->base, surf_index);
> }
>
> void
> @@ -872,8 +868,6 @@ fs_generator::generate_get_buffer_size(fs_inst *inst,
> inst->header_size > 0,
> simd_mode,
> BRW_SAMPLER_RETURN_FORMAT_SINT32);
> -
> - brw_mark_surface_used(prog_data, surf_index.ud);
> }
>
> void
> @@ -1158,8 +1152,6 @@ fs_generator::generate_tex(fs_inst *inst, struct
> brw_reg dst, struct brw_reg src
> inst->header_size != 0,
> simd_mode,
> return_format);
> -
> - brw_mark_surface_used(prog_data, surface + base_binding_table_index);
> } else {
> /* Non-const sampler index */
>
> @@ -1752,9 +1744,6 @@ fs_generator::generate_shader_time_add(fs_inst *,
> brw_shader_time_add(p, payload,
> prog_data->binding_table.shader_time_start);
> brw_pop_insn_state(p);
> -
> - brw_mark_surface_used(prog_data,
> - prog_data->binding_table.shader_time_start);
> }
>
> void
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index 92ec85a27cc..b62dfe0f120 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -2939,10 +2939,6 @@ fs_visitor::emit_non_coherent_fb_read(const
> fs_builder &bld, const fs_reg &dst,
> wm_prog_data->binding_table.render_target_read_start -
> wm_prog_data->base.binding_table.texture_start;
>
> - brw_mark_surface_used(
> - bld.shader->stage_prog_data,
> - wm_prog_data->binding_table.render_target_read_start + target);
> -
> /* Calculate the fragment coordinates. */
> const fs_reg coords = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);
> bld.MOV(offset(coords, bld, 0), pixel_x);
> @@ -3424,7 +3420,6 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
> cs_prog_data->uses_num_work_groups = true;
>
> fs_reg surf_index = brw_imm_ud(surface);
> - brw_mark_surface_used(prog_data, surface);
>
> /* Read the 3 GLuint components of gl_NumWorkGroups */
> for (unsigned i = 0; i < 3; i++) {
> @@ -3644,18 +3639,10 @@ fs_visitor::get_nir_ssbo_intrinsic_index(const
> brw::fs_builder &bld,
> unsigned index = stage_prog_data->binding_table.ssbo_start +
> nir_src_as_uint(instr->src[src]);
> surf_index = brw_imm_ud(index);
> - brw_mark_surface_used(prog_data, index);
> } else {
> surf_index = vgrf(glsl_type::uint_type);
> bld.ADD(surf_index, get_nir_src(instr->src[src]),
> brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
> -
> - /* Assume this may touch any UBO. It would be nice to provide
> - * a tighter bound, but the array information is already lowered away.
> - */
> - brw_mark_surface_used(prog_data,
> - stage_prog_data->binding_table.ssbo_start +
> - nir->info.num_ssbos - 1);
> }
>
> return surf_index;
> @@ -3941,7 +3928,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld,
> nir_intrinsic_instr *instr
> const unsigned index = stage_prog_data->binding_table.ubo_start +
> nir_src_as_uint(instr->src[0]);
> surf_index = brw_imm_ud(index);
> - brw_mark_surface_used(prog_data, index);
> } else {
> /* The block index is not a constant. Evaluate the index expression
> * per-channel and add the base UBO index; we have to select a value
> @@ -3951,13 +3937,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder
> &bld, nir_intrinsic_instr *instr
> bld.ADD(surf_index, get_nir_src(instr->src[0]),
> brw_imm_ud(stage_prog_data->binding_table.ubo_start));
> surf_index = bld.emit_uniformize(surf_index);
> -
> - /* Assume this may touch any UBO. It would be nice to provide
> - * a tighter bound, but the array information is already lowered
> away.
> - */
> - brw_mark_surface_used(prog_data,
> - stage_prog_data->binding_table.ubo_start +
> - nir->info.num_ubos - 1);
> }
>
> if (!nir_src_is_const(instr->src[1])) {
> @@ -4214,8 +4193,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld,
> nir_intrinsic_instr *instr
> ubld.ADD(buffer_size, size_aligned4, negate(size_padding));
>
> bld.MOV(retype(dest, ret_payload.type), component(buffer_size, 0));
> -
> - brw_mark_surface_used(prog_data, index);
> break;
> }
>
> @@ -4826,15 +4803,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld,
> nir_tex_instr *instr)
> unreachable("should be lowered");
>
> case nir_tex_src_texture_offset: {
> - /* Figure out the highest possible texture index and mark it as
> used */
> - uint32_t max_used = texture + instr->texture_array_size - 1;
> - if (instr->op == nir_texop_tg4 && devinfo->gen < 8) {
> - max_used += stage_prog_data->binding_table.gather_texture_start;
> - } else {
> - max_used += stage_prog_data->binding_table.texture_start;
> - }
> - brw_mark_surface_used(prog_data, max_used);
> -
> /* Emit code to evaluate the actual indexing expression */
> fs_reg tmp = vgrf(glsl_type::uint_type);
> bld.ADD(tmp, src, brw_imm_ud(texture));
> diff --git a/src/intel/compiler/brw_vec4_generator.cpp
> b/src/intel/compiler/brw_vec4_generator.cpp
> index 888cb358fd1..93baaef3ab7 100644
> --- a/src/intel/compiler/brw_vec4_generator.cpp
> +++ b/src/intel/compiler/brw_vec4_generator.cpp
> @@ -291,8 +291,6 @@ generate_tex(struct brw_codegen *p,
> inst->header_size != 0,
> BRW_SAMPLER_SIMD_MODE_SIMD4X2,
> return_format);
> -
> - brw_mark_surface_used(&prog_data->base, sampler +
> base_binding_table_index);
> } else {
> /* Non-constant sampler index. */
>
> @@ -1351,8 +1349,6 @@ generate_get_buffer_size(struct brw_codegen *p,
> inst->header_size > 0,
> BRW_SAMPLER_SIMD_MODE_SIMD4X2,
> BRW_SAMPLER_RETURN_FORMAT_SINT32);
> -
> - brw_mark_surface_used(&prog_data->base, surf_index.ud);
> }
>
> static void
> @@ -1378,9 +1374,6 @@ generate_pull_constant_load_gen7(struct brw_codegen *p,
> 0, /* LD message ignores sampler unit */
> GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
> BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0));
> -
> - brw_mark_surface_used(&prog_data->base, surf_index.ud);
> -
> } else {
>
> struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
> @@ -1866,8 +1859,6 @@ generate_code(struct brw_codegen *p,
> case SHADER_OPCODE_SHADER_TIME_ADD:
> brw_shader_time_add(p, src[0],
> prog_data->base.binding_table.shader_time_start);
> - brw_mark_surface_used(&prog_data->base,
> -
> prog_data->base.binding_table.shader_time_start);
> break;
>
> case SHADER_OPCODE_UNTYPED_ATOMIC:
> diff --git a/src/intel/compiler/brw_vec4_nir.cpp
> b/src/intel/compiler/brw_vec4_nir.cpp
> index f8d4dfb4682..98632b5af08 100644
> --- a/src/intel/compiler/brw_vec4_nir.cpp
> +++ b/src/intel/compiler/brw_vec4_nir.cpp
> @@ -387,16 +387,11 @@
> vec4_visitor::get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr)
> unsigned index = prog_data->base.binding_table.ssbo_start +
> nir_src_as_uint(instr->src[src]);
> surf_index = brw_imm_ud(index);
> - brw_mark_surface_used(&prog_data->base, index);
> } else {
> surf_index = src_reg(this, glsl_type::uint_type);
> emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[src], 1),
> brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
> surf_index = emit_uniformize(surf_index);
> -
> - brw_mark_surface_used(&prog_data->base,
> - prog_data->base.binding_table.ssbo_start +
> - nir->info.num_ssbos - 1);
> }
>
> return surf_index;
> @@ -492,8 +487,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr
> *instr)
> emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
>
> emit(inst);
> -
> - brw_mark_surface_used(&prog_data->base, index);
> break;
> }
>
> @@ -700,7 +693,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr
> *instr)
> const unsigned index = prog_data->base.binding_table.ubo_start +
> nir_src_as_uint(instr->src[0]);
> surf_index = brw_imm_ud(index);
> - brw_mark_surface_used(&prog_data->base, index);
> } else {
> /* The block index is not a constant. Evaluate the index expression
> * per-channel and add the base UBO index; we have to select a value
> @@ -711,13 +703,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr
> *instr)
> instr->num_components),
> brw_imm_ud(prog_data->base.binding_table.ubo_start)));
> surf_index = emit_uniformize(surf_index);
> -
> - /* Assume this may touch any UBO. It would be nice to provide
> - * a tighter bound, but the array information is already lowered
> away.
> - */
> - brw_mark_surface_used(&prog_data->base,
> - prog_data->base.binding_table.ubo_start +
> - nir->info.num_ubos - 1);
> }
>
> src_reg offset_reg;
> @@ -2031,20 +2016,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
> }
>
> case nir_tex_src_texture_offset: {
> - /* The highest texture which may be used by this operation is
> - * the last element of the array. Mark it here, because the generator
> - * doesn't have enough information to determine the bound.
> - */
> - uint32_t array_size = instr->texture_array_size;
> - uint32_t max_used = texture + array_size - 1;
> - if (instr->op == nir_texop_tg4) {
> - max_used += prog_data->base.binding_table.gather_texture_start;
> - } else {
> - max_used += prog_data->base.binding_table.texture_start;
> - }
> -
> - brw_mark_surface_used(&prog_data->base, max_used);
> -
> /* Emit code to evaluate the actual indexing expression */
> src_reg src = get_nir_src(instr->src[i].src, 1);
> src_reg temp(this, glsl_type::uint_type);
> diff --git a/src/intel/compiler/brw_vec4_visitor.cpp
> b/src/intel/compiler/brw_vec4_visitor.cpp
> index aea64c5b54d..a8ff7f4da25 100644
> --- a/src/intel/compiler/brw_vec4_visitor.cpp
> +++ b/src/intel/compiler/brw_vec4_visitor.cpp
> @@ -1748,8 +1748,6 @@ vec4_visitor::emit_pull_constant_load(bblock_t
> *block, vec4_instruction *inst,
> src = byte_offset(src, 16);
> }
>
> - brw_mark_surface_used(&prog_data->base, index);
> -
> if (is_64bit) {
> temp = retype(temp, BRW_REGISTER_TYPE_DF);
> shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst);
> diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> index 66cdc1a10b6..256fdd8fc79 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> @@ -109,10 +109,6 @@ brw_setup_image_uniform_values(gl_shader_stage stage,
> image_idx,
> offsetof(brw_image_param, swizzling), 2);
> param += BRW_IMAGE_PARAM_SIZE;
> -
> - brw_mark_surface_used(
> - stage_prog_data,
> - stage_prog_data->binding_table.image_start + image_idx);
> }
> }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c
> b/src/mesa/drivers/dri/i965/brw_program.c
> index 730d6dc0d5a..beccd8b3588 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -835,7 +835,10 @@ brw_assign_common_binding_table_offsets(const struct
> gen_device_info *devinfo,
> stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
> next_binding_table_offset += num_textures;
>
> - /* prog_data->base.binding_table.size will be set by
> brw_mark_surface_used. */
> + /* Set the binding table size. Some callers may append new entries
> + * and increase this accordingly.
> + */
> + stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
>
> assert(next_binding_table_offset <= BRW_MAX_SURFACES);
> return next_binding_table_offset;
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c
> b/src/mesa/drivers/dri/i965/brw_wm.c
> index 536e47638e6..7bbb6166344 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -63,6 +63,9 @@ assign_fs_binding_table_offsets(const struct
> gen_device_info *devinfo,
> next_binding_table_offset;
> next_binding_table_offset += key->nr_color_regions;
> }
> +
> + /* Update the binding table size */
> + prog_data->base.binding_table.size_bytes = next_binding_table_offset * 4;
> }
>
> static void
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list