[Mesa-dev] [PATCH] i965: Drop mark_surface_used mechanism.

Jason Ekstrand jason at jlekstrand.net
Sun Dec 30 13:46:57 UTC 2018


Finally! I haven't read the patch but whole-heartedly

Acked-by: Jason Ekstrand <jason at jlekstrand.net>

On December 30, 2018 00:01:46 Kenneth Graunke <kenneth at whitecape.org> wrote:

> The original idea was that the backend compiler could eliminate
> surfaces, so we would have it mark which ones are actually used,
> then shrink the binding table accordingly.  Unfortunately, it's a
> pretty blunt mechanism - it can only prune things from the end,
> not the middle - since we decide the layout before we even start
> the backend compiler, and only limit the size.  It also basically
> gives up if it sees indirect array access.
>
> Besides, we do the vast majority of our surface elimination in NIR
> anyway, not the backend - and I don't see that trend changing any
> time soon.  Vulkan abandoned this plan a long time ago, and I don't
> use it in Iris, but it's still been kicking around in i965.
> ---
> src/intel/compiler/brw_compiler.h             | 13 --------
> src/intel/compiler/brw_fs.cpp                 |  4 ---
> src/intel/compiler/brw_fs_generator.cpp       | 11 -------
> src/intel/compiler/brw_fs_nir.cpp             | 32 -------------------
> src/intel/compiler/brw_vec4_generator.cpp     |  9 ------
> src/intel/compiler/brw_vec4_nir.cpp           | 29 -----------------
> src/intel/compiler/brw_vec4_visitor.cpp       |  2 --
> .../drivers/dri/i965/brw_nir_uniforms.cpp     |  4 ---
> src/mesa/drivers/dri/i965/brw_program.c       |  5 ++-
> src/mesa/drivers/dri/i965/brw_wm.c            |  3 ++
> 10 files changed, 7 insertions(+), 105 deletions(-)
>
> I thought this would let me eliminate nir_tex_instr::texture_array_size
> too, but apparently we use that for the used_by_txf sRGB shenanigans.
>
> Ultimately, I'd like to start using nir_lower_samplers_as_deref instead
> of nir_lower_samplers, and this is one step towards that.
>
> diff --git a/src/intel/compiler/brw_compiler.h 
> b/src/intel/compiler/brw_compiler.h
> index e4f4d83c8e8..61a4528d372 100644
> --- a/src/intel/compiler/brw_compiler.h
> +++ b/src/intel/compiler/brw_compiler.h
> @@ -643,19 +643,6 @@ brw_stage_prog_data_add_params(struct 
> brw_stage_prog_data *prog_data,
>    return prog_data->param + old_nr_params;
> }
>
> -static inline void
> -brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
> -                      unsigned surf_index)
> -{
> -   /* A binding table index is 8 bits and the top 3 values are reserved for
> -    * special things (stateless and SLM).
> -    */
> -   assert(surf_index <= 252);
> -
> -   prog_data->binding_table.size_bytes =
> -      MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
> -}
> -
> enum brw_barycentric_mode {
>    BRW_BARYCENTRIC_PERSPECTIVE_PIXEL       = 0,
>    BRW_BARYCENTRIC_PERSPECTIVE_CENTROID    = 1,
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 590a9b32a8e..d5f70e6a2f1 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -2376,8 +2376,6 @@ fs_visitor::lower_constant_loads()
>          inst->src[i].nr = dst.nr;
>          inst->src[i].offset = (base & (block_sz - 1)) +
>                                inst->src[i].offset % 4;
> -
> -         brw_mark_surface_used(prog_data, index);
>       }
>
>       if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
> @@ -2391,8 +2389,6 @@ fs_visitor::lower_constant_loads()
>                                     inst->src[1],
>                                     pull_index * 4);
>          inst->remove(block);
> -
> -         brw_mark_surface_used(prog_data, index);
>       }
>    }
>    invalidate_live_intervals();
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index 08dd83dded7..97a1760528c 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -315,8 +315,6 @@ fs_generator::fire_fb_write(fs_inst *inst,
>
>    if (devinfo->gen >= 6)
>       brw_inst_set_rt_slot_group(devinfo, insn, inst->group / 16);
> -
> -   brw_mark_surface_used(&prog_data->base, surf_index);
> }
>
> void
> @@ -373,8 +371,6 @@ fs_generator::generate_fb_read(fs_inst *inst, struct 
> brw_reg dst,
>    gen9_fb_READ(p, dst, payload, surf_index,
>                 inst->header_size, inst->size_written / REG_SIZE,
>                 prog_data->persample_dispatch);
> -
> -   brw_mark_surface_used(&prog_data->base, surf_index);
> }
>
> void
> @@ -872,8 +868,6 @@ fs_generator::generate_get_buffer_size(fs_inst *inst,
>               inst->header_size > 0,
>               simd_mode,
>               BRW_SAMPLER_RETURN_FORMAT_SINT32);
> -
> -   brw_mark_surface_used(prog_data, surf_index.ud);
> }
>
> void
> @@ -1158,8 +1152,6 @@ fs_generator::generate_tex(fs_inst *inst, struct 
> brw_reg dst, struct brw_reg src
>                  inst->header_size != 0,
>                  simd_mode,
>                  return_format);
> -
> -      brw_mark_surface_used(prog_data, surface + base_binding_table_index);
>    } else {
>       /* Non-const sampler index */
>
> @@ -1752,9 +1744,6 @@ fs_generator::generate_shader_time_add(fs_inst *,
>    brw_shader_time_add(p, payload,
>                        prog_data->binding_table.shader_time_start);
>    brw_pop_insn_state(p);
> -
> -   brw_mark_surface_used(prog_data,
> -                         prog_data->binding_table.shader_time_start);
> }
>
> void
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index 92ec85a27cc..b62dfe0f120 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -2939,10 +2939,6 @@ fs_visitor::emit_non_coherent_fb_read(const 
> fs_builder &bld, const fs_reg &dst,
>       wm_prog_data->binding_table.render_target_read_start -
>       wm_prog_data->base.binding_table.texture_start;
>
> -   brw_mark_surface_used(
> -      bld.shader->stage_prog_data,
> -      wm_prog_data->binding_table.render_target_read_start + target);
> -
>    /* Calculate the fragment coordinates. */
>    const fs_reg coords = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);
>    bld.MOV(offset(coords, bld, 0), pixel_x);
> @@ -3424,7 +3420,6 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
>       cs_prog_data->uses_num_work_groups = true;
>
>       fs_reg surf_index = brw_imm_ud(surface);
> -      brw_mark_surface_used(prog_data, surface);
>
>       /* Read the 3 GLuint components of gl_NumWorkGroups */
>       for (unsigned i = 0; i < 3; i++) {
> @@ -3644,18 +3639,10 @@ fs_visitor::get_nir_ssbo_intrinsic_index(const 
> brw::fs_builder &bld,
>       unsigned index = stage_prog_data->binding_table.ssbo_start +
>                        nir_src_as_uint(instr->src[src]);
>       surf_index = brw_imm_ud(index);
> -      brw_mark_surface_used(prog_data, index);
>    } else {
>       surf_index = vgrf(glsl_type::uint_type);
>       bld.ADD(surf_index, get_nir_src(instr->src[src]),
>               brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
> -
> -      /* Assume this may touch any UBO. It would be nice to provide
> -       * a tighter bound, but the array information is already lowered away.
> -       */
> -      brw_mark_surface_used(prog_data,
> -                            stage_prog_data->binding_table.ssbo_start +
> -                            nir->info.num_ssbos - 1);
>    }
>
>    return surf_index;
> @@ -3941,7 +3928,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
> nir_intrinsic_instr *instr
>          const unsigned index = stage_prog_data->binding_table.ubo_start +
>                                 nir_src_as_uint(instr->src[0]);
>          surf_index = brw_imm_ud(index);
> -         brw_mark_surface_used(prog_data, index);
>       } else {
>          /* The block index is not a constant. Evaluate the index expression
>           * per-channel and add the base UBO index; we have to select a value
> @@ -3951,13 +3937,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder 
> &bld, nir_intrinsic_instr *instr
>          bld.ADD(surf_index, get_nir_src(instr->src[0]),
>                  brw_imm_ud(stage_prog_data->binding_table.ubo_start));
>          surf_index = bld.emit_uniformize(surf_index);
> -
> -         /* Assume this may touch any UBO. It would be nice to provide
> -          * a tighter bound, but the array information is already lowered 
> away.
> -          */
> -         brw_mark_surface_used(prog_data,
> -                               stage_prog_data->binding_table.ubo_start +
> -                               nir->info.num_ubos - 1);
>       }
>
>       if (!nir_src_is_const(instr->src[1])) {
> @@ -4214,8 +4193,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
> nir_intrinsic_instr *instr
>       ubld.ADD(buffer_size, size_aligned4, negate(size_padding));
>
>       bld.MOV(retype(dest, ret_payload.type), component(buffer_size, 0));
> -
> -      brw_mark_surface_used(prog_data, index);
>       break;
>    }
>
> @@ -4826,15 +4803,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, 
> nir_tex_instr *instr)
>          unreachable("should be lowered");
>
>       case nir_tex_src_texture_offset: {
> -         /* Figure out the highest possible texture index and mark it as 
> used */
> -         uint32_t max_used = texture + instr->texture_array_size - 1;
> -         if (instr->op == nir_texop_tg4 && devinfo->gen < 8) {
> -            max_used += stage_prog_data->binding_table.gather_texture_start;
> -         } else {
> -            max_used += stage_prog_data->binding_table.texture_start;
> -         }
> -         brw_mark_surface_used(prog_data, max_used);
> -
>          /* Emit code to evaluate the actual indexing expression */
>          fs_reg tmp = vgrf(glsl_type::uint_type);
>          bld.ADD(tmp, src, brw_imm_ud(texture));
> diff --git a/src/intel/compiler/brw_vec4_generator.cpp 
> b/src/intel/compiler/brw_vec4_generator.cpp
> index 888cb358fd1..93baaef3ab7 100644
> --- a/src/intel/compiler/brw_vec4_generator.cpp
> +++ b/src/intel/compiler/brw_vec4_generator.cpp
> @@ -291,8 +291,6 @@ generate_tex(struct brw_codegen *p,
>                  inst->header_size != 0,
>                  BRW_SAMPLER_SIMD_MODE_SIMD4X2,
>                  return_format);
> -
> -      brw_mark_surface_used(&prog_data->base, sampler + 
> base_binding_table_index);
>    } else {
>       /* Non-constant sampler index. */
>
> @@ -1351,8 +1349,6 @@ generate_get_buffer_size(struct brw_codegen *p,
>               inst->header_size > 0,
>               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
>               BRW_SAMPLER_RETURN_FORMAT_SINT32);
> -
> -   brw_mark_surface_used(&prog_data->base, surf_index.ud);
> }
>
> static void
> @@ -1378,9 +1374,6 @@ generate_pull_constant_load_gen7(struct brw_codegen *p,
>                                     0, /* LD message ignores sampler unit */
>                                     GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
>                                     BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0));
> -
> -      brw_mark_surface_used(&prog_data->base, surf_index.ud);
> -
>    } else {
>
>       struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
> @@ -1866,8 +1859,6 @@ generate_code(struct brw_codegen *p,
>       case SHADER_OPCODE_SHADER_TIME_ADD:
>          brw_shader_time_add(p, src[0],
>                              prog_data->base.binding_table.shader_time_start);
> -         brw_mark_surface_used(&prog_data->base,
> -                               
> prog_data->base.binding_table.shader_time_start);
>          break;
>
>       case SHADER_OPCODE_UNTYPED_ATOMIC:
> diff --git a/src/intel/compiler/brw_vec4_nir.cpp 
> b/src/intel/compiler/brw_vec4_nir.cpp
> index f8d4dfb4682..98632b5af08 100644
> --- a/src/intel/compiler/brw_vec4_nir.cpp
> +++ b/src/intel/compiler/brw_vec4_nir.cpp
> @@ -387,16 +387,11 @@ 
> vec4_visitor::get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr)
>       unsigned index = prog_data->base.binding_table.ssbo_start +
>                        nir_src_as_uint(instr->src[src]);
>       surf_index = brw_imm_ud(index);
> -      brw_mark_surface_used(&prog_data->base, index);
>    } else {
>       surf_index = src_reg(this, glsl_type::uint_type);
>       emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[src], 1),
>                brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
>       surf_index = emit_uniformize(surf_index);
> -
> -      brw_mark_surface_used(&prog_data->base,
> -                            prog_data->base.binding_table.ssbo_start +
> -                            nir->info.num_ssbos - 1);
>    }
>
>    return surf_index;
> @@ -492,8 +487,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
> *instr)
>       emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
>
>       emit(inst);
> -
> -      brw_mark_surface_used(&prog_data->base, index);
>       break;
>    }
>
> @@ -700,7 +693,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
> *instr)
>          const unsigned index = prog_data->base.binding_table.ubo_start +
>                                 nir_src_as_uint(instr->src[0]);
>          surf_index = brw_imm_ud(index);
> -         brw_mark_surface_used(&prog_data->base, index);
>       } else {
>          /* The block index is not a constant. Evaluate the index expression
>           * per-channel and add the base UBO index; we have to select a value
> @@ -711,13 +703,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
> *instr)
>                                                    instr->num_components),
>                   brw_imm_ud(prog_data->base.binding_table.ubo_start)));
>          surf_index = emit_uniformize(surf_index);
> -
> -         /* Assume this may touch any UBO. It would be nice to provide
> -          * a tighter bound, but the array information is already lowered 
> away.
> -          */
> -         brw_mark_surface_used(&prog_data->base,
> -                               prog_data->base.binding_table.ubo_start +
> -                               nir->info.num_ubos - 1);
>       }
>
>       src_reg offset_reg;
> @@ -2031,20 +2016,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
>       }
>
>       case nir_tex_src_texture_offset: {
> -         /* The highest texture which may be used by this operation is
> -          * the last element of the array. Mark it here, because the generator
> -          * doesn't have enough information to determine the bound.
> -          */
> -         uint32_t array_size = instr->texture_array_size;
> -         uint32_t max_used = texture + array_size - 1;
> -         if (instr->op == nir_texop_tg4) {
> -            max_used += prog_data->base.binding_table.gather_texture_start;
> -         } else {
> -            max_used += prog_data->base.binding_table.texture_start;
> -         }
> -
> -         brw_mark_surface_used(&prog_data->base, max_used);
> -
>          /* Emit code to evaluate the actual indexing expression */
>          src_reg src = get_nir_src(instr->src[i].src, 1);
>          src_reg temp(this, glsl_type::uint_type);
> diff --git a/src/intel/compiler/brw_vec4_visitor.cpp 
> b/src/intel/compiler/brw_vec4_visitor.cpp
> index aea64c5b54d..a8ff7f4da25 100644
> --- a/src/intel/compiler/brw_vec4_visitor.cpp
> +++ b/src/intel/compiler/brw_vec4_visitor.cpp
> @@ -1748,8 +1748,6 @@ vec4_visitor::emit_pull_constant_load(bblock_t 
> *block, vec4_instruction *inst,
>       src = byte_offset(src, 16);
>    }
>
> -   brw_mark_surface_used(&prog_data->base, index);
> -
>    if (is_64bit) {
>       temp = retype(temp, BRW_REGISTER_TYPE_DF);
>       shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst);
> diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp 
> b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> index 66cdc1a10b6..256fdd8fc79 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> @@ -109,10 +109,6 @@ brw_setup_image_uniform_values(gl_shader_stage stage,
>                              image_idx,
>                              offsetof(brw_image_param, swizzling), 2);
>       param += BRW_IMAGE_PARAM_SIZE;
> -
> -      brw_mark_surface_used(
> -         stage_prog_data,
> -         stage_prog_data->binding_table.image_start + image_idx);
>    }
> }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
> b/src/mesa/drivers/dri/i965/brw_program.c
> index 730d6dc0d5a..beccd8b3588 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -835,7 +835,10 @@ brw_assign_common_binding_table_offsets(const struct 
> gen_device_info *devinfo,
>    stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
>    next_binding_table_offset += num_textures;
>
> -   /* prog_data->base.binding_table.size will be set by 
> brw_mark_surface_used. */
> +   /* Set the binding table size.  Some callers may append new entries
> +    * and increase this accordingly.
> +    */
> +   stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
>
>    assert(next_binding_table_offset <= BRW_MAX_SURFACES);
>    return next_binding_table_offset;
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
> b/src/mesa/drivers/dri/i965/brw_wm.c
> index 536e47638e6..7bbb6166344 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -63,6 +63,9 @@ assign_fs_binding_table_offsets(const struct 
> gen_device_info *devinfo,
>          next_binding_table_offset;
>       next_binding_table_offset += key->nr_color_regions;
>    }
> +
> +   /* Update the binding table size */
> +   prog_data->base.binding_table.size_bytes = next_binding_table_offset * 4;
> }
>
> static void
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev





More information about the mesa-dev mailing list