[Mesa-dev] [PATCH 1/4] i965/fs: Merge nir_emit_texture and emit_texture

Kenneth Graunke kenneth at whitecape.org
Thu May 5 00:20:39 UTC 2016


On Tuesday, May 3, 2016 3:00:24 PM PDT Jason Ekstrand wrote:
> The fs_visitor::emit_texture helper originated when we still had both NIR
> and IR visitors for the FS backend.  Since the old visitor was removed,
> emit_texture serves no real purpose beyond arbitrarily splitting
> heavily-linked code across two functions.
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h           |  18 ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp     | 223 ++++++++++++++++++
+--------
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 159 -------------------
>  3 files changed, 162 insertions(+), 238 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/
brw_fs.h
> index a5c3297..925e4b7 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -200,21 +200,6 @@ public:
>     void emit_interpolation_setup_gen4();
>     void emit_interpolation_setup_gen6();
>     void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
> -   void emit_texture(ir_texture_opcode op,
> -                     const glsl_type *dest_type,
> -                     fs_reg coordinate, int components,
> -                     fs_reg shadow_c,
> -                     fs_reg lod, fs_reg dpdy, int grad_components,
> -                     fs_reg sample_index,
> -                     fs_reg offset,
> -                     fs_reg mcs,
> -                     int gather_component,
> -                     bool is_cube_array,
> -                     uint32_t surface,
> -                     fs_reg surface_reg,
> -                     uint32_t sampler,
> -                     fs_reg sampler_reg,
> -                     unsigned return_channels);
>     fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
>                           const fs_reg &sampler);
>     void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
> @@ -375,9 +360,6 @@ public:
>     bool simd16_unsupported;
>     char *no16_msg;
>  
> -   /* Result of last visit() method. Still used by emit_texture() */
> -   fs_reg result;
> -
>     /** Register numbers for thread payload fields. */
>     struct thread_payload {
>        uint8_t source_depth_reg;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/
dri/i965/brw_fs_nir.cpp
> index 360e2c9..ebc54ad 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -3068,65 +3068,61 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, 
nir_tex_instr *instr)
>  {
>     unsigned texture = instr->texture_index;
>     unsigned sampler = instr->sampler_index;
> -   fs_reg texture_reg(brw_imm_ud(texture));
> -   fs_reg sampler_reg(brw_imm_ud(sampler));
>  
> -   int gather_component = instr->component;
> +   fs_reg srcs[TEX_LOGICAL_NUM_SRCS];
>  
> -   bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
> -                        instr->is_array;
> +   srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(texture);
> +   srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(sampler);
>  
>     int lod_components = 0;
>  
> -   fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, 
tex_offset;
> -
>     /* The hardware requires a LOD for buffer textures */
>     if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
> -      lod = brw_imm_d(0);
> +      srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_d(0);
>  
>     for (unsigned i = 0; i < instr->num_srcs; i++) {
>        fs_reg src = get_nir_src(instr->src[i].src);
>        switch (instr->src[i].src_type) {
>        case nir_tex_src_bias:
> -         lod = retype(src, BRW_REGISTER_TYPE_F);
> +         srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_F);
>           break;
>        case nir_tex_src_comparitor:
> -         shadow_comparitor = retype(src, BRW_REGISTER_TYPE_F);
> +         srcs[TEX_LOGICAL_SRC_SHADOW_C] = retype(src, BRW_REGISTER_TYPE_F);
>           break;
>        case nir_tex_src_coord:
>           switch (instr->op) {
>           case nir_texop_txf:
>           case nir_texop_txf_ms:
>           case nir_texop_samples_identical:
> -            coordinate = retype(src, BRW_REGISTER_TYPE_D);
> +            srcs[TEX_LOGICAL_SRC_COORDINATE] = retype(src, 
BRW_REGISTER_TYPE_D);
>              break;
>           default:
> -            coordinate = retype(src, BRW_REGISTER_TYPE_F);
> +            srcs[TEX_LOGICAL_SRC_COORDINATE] = retype(src, 
BRW_REGISTER_TYPE_F);
>              break;
>           }
>           break;
>        case nir_tex_src_ddx:
> -         lod = retype(src, BRW_REGISTER_TYPE_F);
> +         srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_F);
>           lod_components = nir_tex_instr_src_size(instr, i);
>           break;
>        case nir_tex_src_ddy:
> -         lod2 = retype(src, BRW_REGISTER_TYPE_F);
> +         srcs[TEX_LOGICAL_SRC_LOD2] = retype(src, BRW_REGISTER_TYPE_F);
>           break;
>        case nir_tex_src_lod:
>           switch (instr->op) {
>           case nir_texop_txs:
> -            lod = retype(src, BRW_REGISTER_TYPE_UD);
> +            srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_UD);
>              break;
>           case nir_texop_txf:
> -            lod = retype(src, BRW_REGISTER_TYPE_D);
> +            srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_D);
>              break;
>           default:
> -            lod = retype(src, BRW_REGISTER_TYPE_F);
> +            srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_F);
>              break;
>           }
>           break;
>        case nir_tex_src_ms_index:
> -         sample_index = retype(src, BRW_REGISTER_TYPE_UD);
> +         srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, 
BRW_REGISTER_TYPE_UD);
>           break;
>  
>        case nir_tex_src_offset: {
> @@ -3135,9 +3131,10 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, 
nir_tex_instr *instr)
>           if (const_offset) {
>              unsigned header_bits = brw_texture_offset(const_offset->i32, 
3);
>              if (header_bits != 0)
> -               tex_offset = brw_imm_ud(header_bits);
> +               srcs[TEX_LOGICAL_SRC_OFFSET_VALUE] = 
brw_imm_ud(header_bits);
>           } else {
> -            tex_offset = retype(src, BRW_REGISTER_TYPE_D);
> +            srcs[TEX_LOGICAL_SRC_OFFSET_VALUE] =
> +               retype(src, BRW_REGISTER_TYPE_D);
>           }
>           break;
>        }
> @@ -3156,17 +3153,17 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, 
nir_tex_instr *instr)
>           brw_mark_surface_used(prog_data, max_used);
>  
>           /* Emit code to evaluate the actual indexing expression */
> -         texture_reg = vgrf(glsl_type::uint_type);
> -         bld.ADD(texture_reg, src, brw_imm_ud(texture));
> -         texture_reg = bld.emit_uniformize(texture_reg);
> +         fs_reg tmp = vgrf(glsl_type::uint_type);
> +         bld.ADD(tmp, src, brw_imm_ud(texture));
> +         srcs[TEX_LOGICAL_SRC_SURFACE] = bld.emit_uniformize(tmp);
>           break;
>        }
>  
>        case nir_tex_src_sampler_offset: {
>           /* Emit code to evaluate the actual indexing expression */
> -         sampler_reg = vgrf(glsl_type::uint_type);
> -         bld.ADD(sampler_reg, src, brw_imm_ud(sampler));
> -         sampler_reg = bld.emit_uniformize(sampler_reg);
> +         fs_reg tmp = vgrf(glsl_type::uint_type);
> +         bld.ADD(tmp, src, brw_imm_ud(sampler));
> +         srcs[TEX_LOGICAL_SRC_SAMPLER] = bld.emit_uniformize(tmp);
>           break;
>        }
>  
> @@ -3179,38 +3176,92 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, 
nir_tex_instr *instr)
>         instr->op == nir_texop_samples_identical) {
>        if (devinfo->gen >= 7 &&
>            key_tex->compressed_multisample_layout_mask & (1 << texture)) {
> -         mcs = emit_mcs_fetch(coordinate, instr->coord_components, 
texture_reg);
> +         srcs[TEX_LOGICAL_SRC_MCS] =
> +            emit_mcs_fetch(srcs[TEX_LOGICAL_SRC_COORDINATE],
> +                           instr->coord_components,
> +                           srcs[TEX_LOGICAL_SRC_SURFACE]);
>        } else {
> -         mcs = brw_imm_ud(0u);
> +         srcs[TEX_LOGICAL_SRC_MCS] = brw_imm_ud(0u);
>        }
>     }
>  
> -   enum glsl_base_type dest_base_type =
> -     brw_glsl_base_type_for_nir_type (instr->dest_type);
> +   srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(instr-
>coord_components);
> +   srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components);
> +
> +   if (instr->op == nir_texop_query_levels) {
> +      /* textureQueryLevels() is implemented in terms of TXS so we need to
> +       * pass a valid LOD argument.
> +       */
> +      assert(srcs[TEX_LOGICAL_SRC_LOD].file == BAD_FILE);
> +      srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0u);
> +   }
> +
> +   if (instr->op == nir_texop_samples_identical) {
> +      fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
> +
> +      /* If mcs is an immediate value, it means there is no MCS.  In that 
case
> +       * just return false.
> +       */
> +      if (srcs[TEX_LOGICAL_SRC_MCS].file == BRW_IMMEDIATE_VALUE) {
> +         bld.MOV(dst, brw_imm_ud(0u));
> +      } else if ((key_tex->msaa_16 & (1 << sampler))) {
> +         fs_reg tmp = vgrf(glsl_type::uint_type);
> +         bld.OR(tmp, srcs[TEX_LOGICAL_SRC_MCS],
> +                offset(srcs[TEX_LOGICAL_SRC_MCS], bld, 1));
> +         bld.CMP(dst, tmp, brw_imm_ud(0u), BRW_CONDITIONAL_EQ);
> +      } else {
> +         bld.CMP(dst, srcs[TEX_LOGICAL_SRC_MCS], brw_imm_ud(0u),
> +                 BRW_CONDITIONAL_EQ);
> +      }
>  
> -   const glsl_type *dest_type =
> -      glsl_type::get_instance(dest_base_type, 
nir_tex_instr_dest_size(instr),
> -                              1);
> +      return;
> +   }
>  
> -   ir_texture_opcode op;
> +   enum opcode opcode;
>     switch (instr->op) {
> -   case nir_texop_lod: op = ir_lod; break;
> -   case nir_texop_query_levels: op = ir_query_levels; break;
> -   case nir_texop_tex: op = ir_tex; break;
> -   case nir_texop_tg4: op = ir_tg4; break;
> -   case nir_texop_txb: op = ir_txb; break;
> -   case nir_texop_txd: op = ir_txd; break;
> -   case nir_texop_txf: op = ir_txf; break;
> -   case nir_texop_txf_ms: op = ir_txf_ms; break;
> -   case nir_texop_txl: op = ir_txl; break;
> -   case nir_texop_txs: op = ir_txs; break;
> +   case nir_texop_tex:
> +      opcode = SHADER_OPCODE_TEX_LOGICAL;
> +      break;
> +   case nir_texop_txb:
> +      opcode = FS_OPCODE_TXB_LOGICAL;
> +      break;
> +   case nir_texop_txl:
> +      opcode = SHADER_OPCODE_TXL_LOGICAL;
> +      break;
> +   case nir_texop_txd:
> +      opcode = SHADER_OPCODE_TXD_LOGICAL;
> +      break;
> +   case nir_texop_txf:
> +      opcode = SHADER_OPCODE_TXF_LOGICAL;
> +      break;
> +   case nir_texop_txf_ms:
> +      if ((key_tex->msaa_16 & (1 << sampler)))
> +         opcode = SHADER_OPCODE_TXF_CMS_W_LOGICAL;
> +      else
> +         opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
> +      break;
> +   case nir_texop_query_levels:
> +   case nir_texop_txs:
> +      opcode = SHADER_OPCODE_TXS_LOGICAL;
> +      break;
> +   case nir_texop_lod:
> +      opcode = SHADER_OPCODE_LOD_LOGICAL;
> +      break;
> +   case nir_texop_tg4:
> +      if (srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file != BAD_FILE &&
> +          srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file != IMM)
> +         opcode = SHADER_OPCODE_TG4_OFFSET_LOGICAL;
> +      else
> +         opcode = SHADER_OPCODE_TG4_LOGICAL;
> +      break;
>     case nir_texop_texture_samples: {
>        fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
>  
>        fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D, 4);
>        fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, tmp,
>                                 bld.vgrf(BRW_REGISTER_TYPE_D, 1),
> -                               texture_reg, texture_reg);
> +                               srcs[TEX_LOGICAL_SRC_SURFACE],
> +                               srcs[TEX_LOGICAL_SRC_SURFACE]);
>        inst->mlen = 1;
>        inst->header_size = 1;
>        inst->base_mrf = -1;
> @@ -3220,33 +3271,83 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, 
nir_tex_instr *instr)
>        bld.MOV(dst, tmp);
>        return;
>     }
> -   case nir_texop_samples_identical: op = ir_samples_identical; break;
>     default:
>        unreachable("unknown texture opcode");
>     }
>  
> -   unsigned num_components = nir_tex_instr_dest_size(instr);
> +   fs_reg dst = bld.vgrf(brw_type_for_nir_type(instr->dest_type), 4);
> +   fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
>  
> -   if (instr->dest.is_ssa) {
> -      uint8_t write_mask = nir_ssa_def_components_read(&instr->dest.ssa);
> +   const unsigned dest_size = nir_tex_instr_dest_size(instr);
> +   if (devinfo->gen >= 9 &&
> +       instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels) {
> +      unsigned write_mask = instr->dest.is_ssa ?
> +                            nir_ssa_def_components_read(&instr->dest.ssa):
> +                            (1 << dest_size) - 1;
>        assert(write_mask != 0); /* dead code should have been eliminated */
> -      num_components = _mesa_fls(write_mask);
> +      inst->regs_written = _mesa_fls(write_mask) * dispatch_width / 8;
> +   } else {
> +      inst->regs_written = 4 * dispatch_width / 8;
> +   }
> +
> +   if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE)
> +      inst->shadow_compare = true;
> +
> +   if (srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file == IMM)
> +      inst->offset = srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].ud;
> +
> +   if (instr->op == nir_texop_tg4) {
> +      if (instr->component == 1 &&
> +          key_tex->gather_channel_quirk_mask & (1 << texture)) {
> +         /* gather4 sampler is broken for green channel on RG32F --
> +          * we must ask for blue instead.
> +          */
> +         inst->offset |= 2 << 16;
> +      } else {
> +         inst->offset |= instr->component << 16;
> +      }
> +
> +      if (devinfo->gen == 6)
> +         emit_gen6_gather_wa(key_tex->gen6_gather_wa[texture], dst);
> +   }
> +
> +   if (instr->op == nir_texop_query_levels) {
> +      /* # levels is in .w */
> +      dst = offset(dst, bld, 3);
>     }
>  
> -   const bool can_reduce_return_length = devinfo->gen >= 9 &&
> -      instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels;
> +   bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
> +                        instr->is_array;
> +
> +   /* fixup #layers for cube map arrays */
> +   if (instr->op == nir_texop_txs && (devinfo->gen < 7 || is_cube_array)) {
> +      fs_reg depth = offset(dst, bld, 2);
> +      fs_reg fixed_depth = vgrf(glsl_type::int_type);
>  
> -   emit_texture(op, dest_type, coordinate, instr->coord_components,
> -                shadow_comparitor, lod, lod2, lod_components, sample_index,
> -                tex_offset, mcs, gather_component, is_cube_array,
> -                texture, texture_reg, sampler, sampler_reg,
> -                can_reduce_return_length ? num_components : 4);
> +      if (is_cube_array) {
> +         bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, 
brw_imm_d(6));
> +      } else if (devinfo->gen < 7) {
> +         /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
> +         bld.emit_minmax(fixed_depth, depth, brw_imm_d(1), 
BRW_CONDITIONAL_GE);
> +      }
> +
> +      fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst-
>regs_written);
> +      int components = inst->regs_written / (inst->exec_size / 8);
> +      for (int i = 0; i < components; i++) {
> +         if (i == 2) {
> +            fixed_payload[i] = fixed_depth;
> +         } else {
> +            fixed_payload[i] = offset(dst, bld, i);
> +         }
> +      }
> +      bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0);
> +   }
>  
> -   fs_reg dest = get_nir_dest(instr->dest);
> -   dest.type = this->result.type;
> +   fs_reg nir_dest = get_nir_dest(instr->dest);
> +   nir_dest.type = dst.type;
>     emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
> -                             dest, this->result),
> -                (1 << num_components) - 1);
> +                             nir_dest, dst),
> +                (1 << dest_size) - 1);

This misses one thing from my recent Skylake rlen reduction work:
instead of using (1 << dest_size) - 1, I used the mask from
nir_ssa_def_components_read() here.  That way, if some of the channels
aren't actually used, we don't bother emitting MOVs for them.

I don't know that it's a big deal, but it semes easy enough to preserve.

With that fixed, the series is:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160504/911e3246/attachment-0001.sig>


More information about the mesa-dev mailing list