[Mesa-dev] [PATCH 03/15] i965/fs: Use MOV_INDIRECT for all indirect uniform loads
Matt Turner
mattst88 at gmail.com
Wed Dec 9 20:33:34 PST 2015
On Wed, Dec 9, 2015 at 8:23 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> Instead of using reladdr, this commit changes the FS backend to emit a
> MOV_INDIRECT whenever we need an indirect uniform load. We also have to
> rework some of the other bits of the backend to handle this new form of
> uniform load. The obvious change is that demote_pull_constants now acts
> more like a lowering pass when it hits a MOV_INDIRECT.
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 72 +++++++++++++++++++-------------
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 53 ++++++++++++++++++-----
> 2 files changed, 86 insertions(+), 39 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index bf446d2..7cc03c5 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1945,8 +1945,8 @@ fs_visitor::assign_constant_locations()
> if (inst->src[i].file != UNIFORM)
> continue;
>
> - if (inst->src[i].reladdr) {
> - int uniform = inst->src[i].nr;
> + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) {
> + int uniform = inst->src[0].nr;
>
> /* If this array isn't already present in the pull constant buffer,
> * add it.
> @@ -2028,49 +2028,63 @@ fs_visitor::assign_constant_locations()
> void
> fs_visitor::demote_pull_constants()
> {
> - foreach_block_and_inst (block, fs_inst, inst, cfg) {
> + const unsigned index = stage_prog_data->binding_table.pull_constants_start;
> +
> + foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
> + /* Set up the annotation tracking for new generated instructions. */
> + const fs_builder ibld(this, block, inst);
> +
> for (int i = 0; i < inst->sources; i++) {
> if (inst->src[i].file != UNIFORM)
> continue;
>
> - int pull_index;
> + /* We'll handle this case later */
> + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0)
> + continue;
> +
> unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
> - if (location >= uniforms) /* Out of bounds access */
> - pull_index = -1;
> - else
> - pull_index = pull_constant_loc[location];
> + if (location >= uniforms)
> + continue; /* Out of bounds access */
> +
> + int pull_index = pull_constant_loc[location];
>
> if (pull_index == -1)
> continue;
>
> - /* Set up the annotation tracking for new generated instructions. */
> - const fs_builder ibld(this, block, inst);
> - const unsigned index = stage_prog_data->binding_table.pull_constants_start;
> - fs_reg dst = vgrf(glsl_type::float_type);
> -
> assert(inst->src[i].stride == 0);
>
> - /* Generate a pull load into dst. */
> - if (inst->src[i].reladdr) {
> - VARYING_PULL_CONSTANT_LOAD(ibld, dst,
> - brw_imm_ud(index),
> - *inst->src[i].reladdr,
> - pull_index * 4);
> - inst->src[i].reladdr = NULL;
> - inst->src[i].stride = 1;
> - } else {
> - const fs_builder ubld = ibld.exec_all().group(8, 0);
> - struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
> - ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
> - dst, brw_imm_ud(index), offset);
> - inst->src[i].set_smear(pull_index & 3);
> - }
> - brw_mark_surface_used(prog_data, index);
> + fs_reg dst = vgrf(glsl_type::float_type);
> + const fs_builder ubld = ibld.exec_all().group(8, 0);
> + struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
> + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
> + dst, brw_imm_ud(index), offset);
>
> /* Rewrite the instruction to use the temporary VGRF. */
> inst->src[i].file = VGRF;
> inst->src[i].nr = dst.nr;
> inst->src[i].reg_offset = 0;
> + inst->src[i].set_smear(pull_index & 3);
> +
> + brw_mark_surface_used(prog_data, index);
> + }
> +
> + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
> + inst->src[0].file == UNIFORM) {
> +
> + unsigned location = inst->src[0].nr + inst->src[0].reg_offset;
> + if (location >= uniforms)
> + continue; /* Out of bounds access */
> +
> + int pull_index = pull_constant_loc[location];
> + assert(pull_index >= 0); /* This had better be pull */
> +
> + VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,
> + brw_imm_ud(index),
> + inst->src[1],
> + pull_index * 4);
> + inst->remove(block);
> +
> + brw_mark_surface_used(prog_data, index);
> }
> }
> invalidate_live_intervals();
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 15d9b1c..bf239c3 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1136,6 +1136,8 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
> {
> fs_reg image(UNIFORM, deref->var->data.driver_location / 4,
> BRW_REGISTER_TYPE_UD);
> + fs_reg indirect;
> + unsigned indirect_max = 0;
>
> for (const nir_deref *tail = &deref->deref; tail->child;
> tail = tail->child) {
> @@ -1147,7 +1149,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
> image = offset(image, bld, base * element_size);
>
> if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
> - fs_reg tmp = vgrf(glsl_type::int_type);
> + fs_reg tmp = vgrf(glsl_type::uint_type);
>
> if (devinfo->gen == 7 && !devinfo->is_haswell) {
> /* IVB hangs when trying to access an invalid surface index with
> @@ -1166,14 +1168,29 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
> }
>
> bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4));
> - if (image.reladdr)
> - bld.ADD(*image.reladdr, *image.reladdr, tmp);
> - else
> - image.reladdr = new(mem_ctx) fs_reg(tmp);
> + if (indirect.file == BAD_FILE) {
> + indirect = tmp;
> + indirect_max = type_size_scalar(tail->type) - BRW_IMAGE_PARAM_SIZE;
> + } else {
> + bld.ADD(indirect, indirect, tmp);
> + }
> }
> }
>
> - return image;
> + if (indirect.file == BAD_FILE) {
> + return image;
> + } else {
> + /* Emit a pile of MOVs to load the uniform into a temporary. The
> + * dead-code elimination pass will get rid of what we don't use.
> + */
> + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE);
> + for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) {
> + bld.emit(SHADER_OPCODE_MOV_INDIRECT,
> + offset(tmp, bld, j), offset(image, bld, j),
> + indirect, brw_imm_ud((indirect_max + 1) * 4));
> + }
> + return tmp;
> + }
> }
>
> void
> @@ -2302,12 +2319,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
> /* Offsets are in bytes but they should always be multiples of 4 */
> assert(const_offset->u[0] % 4 == 0);
> src.reg_offset = const_offset->u[0] / 4;
> +
> + for (unsigned j = 0; j < instr->num_components; j++) {
> + bld.MOV(offset(dest, bld, j), offset(src, bld, j));
> + }
> } else {
> - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
> - }
> + fs_reg indirect = retype(get_nir_src(instr->src[0]),
> + BRW_REGISTER_TYPE_UD);
>
> - for (unsigned j = 0; j < instr->num_components; j++) {
> - bld.MOV(offset(dest, bld, j), offset(src, bld, j));
> + /* Se need to pass a size to the MOV_INDIRECT but we don't want it to
s/Se/We/
More information about the mesa-dev
mailing list