[Mesa-dev] [PATCH 1/4] i965/fs: Merge nir_emit_texture and emit_texture
Kenneth Graunke
kenneth at whitecape.org
Thu May 5 00:20:39 UTC 2016
On Tuesday, May 3, 2016 3:00:24 PM PDT Jason Ekstrand wrote:
> The fs_visitor::emit_texture helper originated when we still had both NIR
> and IR visitors for the FS backend. Since the old visitor was removed,
> emit_texture serves no real purpose beyond arbitrarily splitting
> heavily-linked code across two functions.
> ---
> src/mesa/drivers/dri/i965/brw_fs.h | 18 ---
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 223 ++++++++++++++++++
+--------
> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 159 -------------------
> 3 files changed, 162 insertions(+), 238 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/
brw_fs.h
> index a5c3297..925e4b7 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -200,21 +200,6 @@ public:
> void emit_interpolation_setup_gen4();
> void emit_interpolation_setup_gen6();
> void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
> - void emit_texture(ir_texture_opcode op,
> - const glsl_type *dest_type,
> - fs_reg coordinate, int components,
> - fs_reg shadow_c,
> - fs_reg lod, fs_reg dpdy, int grad_components,
> - fs_reg sample_index,
> - fs_reg offset,
> - fs_reg mcs,
> - int gather_component,
> - bool is_cube_array,
> - uint32_t surface,
> - fs_reg surface_reg,
> - uint32_t sampler,
> - fs_reg sampler_reg,
> - unsigned return_channels);
> fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
> const fs_reg &sampler);
> void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
> @@ -375,9 +360,6 @@ public:
> bool simd16_unsupported;
> char *no16_msg;
>
> - /* Result of last visit() method. Still used by emit_texture() */
> - fs_reg result;
> -
> /** Register numbers for thread payload fields. */
> struct thread_payload {
> uint8_t source_depth_reg;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/
dri/i965/brw_fs_nir.cpp
> index 360e2c9..ebc54ad 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -3068,65 +3068,61 @@ fs_visitor::nir_emit_texture(const fs_builder &bld,
nir_tex_instr *instr)
> {
> unsigned texture = instr->texture_index;
> unsigned sampler = instr->sampler_index;
> - fs_reg texture_reg(brw_imm_ud(texture));
> - fs_reg sampler_reg(brw_imm_ud(sampler));
>
> - int gather_component = instr->component;
> + fs_reg srcs[TEX_LOGICAL_NUM_SRCS];
>
> - bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
> - instr->is_array;
> + srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(texture);
> + srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(sampler);
>
> int lod_components = 0;
>
> - fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs,
tex_offset;
> -
> /* The hardware requires a LOD for buffer textures */
> if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
> - lod = brw_imm_d(0);
> + srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_d(0);
>
> for (unsigned i = 0; i < instr->num_srcs; i++) {
> fs_reg src = get_nir_src(instr->src[i].src);
> switch (instr->src[i].src_type) {
> case nir_tex_src_bias:
> - lod = retype(src, BRW_REGISTER_TYPE_F);
> + srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_F);
> break;
> case nir_tex_src_comparitor:
> - shadow_comparitor = retype(src, BRW_REGISTER_TYPE_F);
> + srcs[TEX_LOGICAL_SRC_SHADOW_C] = retype(src, BRW_REGISTER_TYPE_F);
> break;
> case nir_tex_src_coord:
> switch (instr->op) {
> case nir_texop_txf:
> case nir_texop_txf_ms:
> case nir_texop_samples_identical:
> - coordinate = retype(src, BRW_REGISTER_TYPE_D);
> + srcs[TEX_LOGICAL_SRC_COORDINATE] = retype(src,
BRW_REGISTER_TYPE_D);
> break;
> default:
> - coordinate = retype(src, BRW_REGISTER_TYPE_F);
> + srcs[TEX_LOGICAL_SRC_COORDINATE] = retype(src,
BRW_REGISTER_TYPE_F);
> break;
> }
> break;
> case nir_tex_src_ddx:
> - lod = retype(src, BRW_REGISTER_TYPE_F);
> + srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_F);
> lod_components = nir_tex_instr_src_size(instr, i);
> break;
> case nir_tex_src_ddy:
> - lod2 = retype(src, BRW_REGISTER_TYPE_F);
> + srcs[TEX_LOGICAL_SRC_LOD2] = retype(src, BRW_REGISTER_TYPE_F);
> break;
> case nir_tex_src_lod:
> switch (instr->op) {
> case nir_texop_txs:
> - lod = retype(src, BRW_REGISTER_TYPE_UD);
> + srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_UD);
> break;
> case nir_texop_txf:
> - lod = retype(src, BRW_REGISTER_TYPE_D);
> + srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_D);
> break;
> default:
> - lod = retype(src, BRW_REGISTER_TYPE_F);
> + srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_F);
> break;
> }
> break;
> case nir_tex_src_ms_index:
> - sample_index = retype(src, BRW_REGISTER_TYPE_UD);
> + srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src,
BRW_REGISTER_TYPE_UD);
> break;
>
> case nir_tex_src_offset: {
> @@ -3135,9 +3131,10 @@ fs_visitor::nir_emit_texture(const fs_builder &bld,
nir_tex_instr *instr)
> if (const_offset) {
> unsigned header_bits = brw_texture_offset(const_offset->i32,
3);
> if (header_bits != 0)
> - tex_offset = brw_imm_ud(header_bits);
> + srcs[TEX_LOGICAL_SRC_OFFSET_VALUE] =
brw_imm_ud(header_bits);
> } else {
> - tex_offset = retype(src, BRW_REGISTER_TYPE_D);
> + srcs[TEX_LOGICAL_SRC_OFFSET_VALUE] =
> + retype(src, BRW_REGISTER_TYPE_D);
> }
> break;
> }
> @@ -3156,17 +3153,17 @@ fs_visitor::nir_emit_texture(const fs_builder &bld,
nir_tex_instr *instr)
> brw_mark_surface_used(prog_data, max_used);
>
> /* Emit code to evaluate the actual indexing expression */
> - texture_reg = vgrf(glsl_type::uint_type);
> - bld.ADD(texture_reg, src, brw_imm_ud(texture));
> - texture_reg = bld.emit_uniformize(texture_reg);
> + fs_reg tmp = vgrf(glsl_type::uint_type);
> + bld.ADD(tmp, src, brw_imm_ud(texture));
> + srcs[TEX_LOGICAL_SRC_SURFACE] = bld.emit_uniformize(tmp);
> break;
> }
>
> case nir_tex_src_sampler_offset: {
> /* Emit code to evaluate the actual indexing expression */
> - sampler_reg = vgrf(glsl_type::uint_type);
> - bld.ADD(sampler_reg, src, brw_imm_ud(sampler));
> - sampler_reg = bld.emit_uniformize(sampler_reg);
> + fs_reg tmp = vgrf(glsl_type::uint_type);
> + bld.ADD(tmp, src, brw_imm_ud(sampler));
> + srcs[TEX_LOGICAL_SRC_SAMPLER] = bld.emit_uniformize(tmp);
> break;
> }
>
> @@ -3179,38 +3176,92 @@ fs_visitor::nir_emit_texture(const fs_builder &bld,
nir_tex_instr *instr)
> instr->op == nir_texop_samples_identical) {
> if (devinfo->gen >= 7 &&
> key_tex->compressed_multisample_layout_mask & (1 << texture)) {
> - mcs = emit_mcs_fetch(coordinate, instr->coord_components,
texture_reg);
> + srcs[TEX_LOGICAL_SRC_MCS] =
> + emit_mcs_fetch(srcs[TEX_LOGICAL_SRC_COORDINATE],
> + instr->coord_components,
> + srcs[TEX_LOGICAL_SRC_SURFACE]);
> } else {
> - mcs = brw_imm_ud(0u);
> + srcs[TEX_LOGICAL_SRC_MCS] = brw_imm_ud(0u);
> }
> }
>
> - enum glsl_base_type dest_base_type =
> - brw_glsl_base_type_for_nir_type (instr->dest_type);
> + srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(instr-
>coord_components);
> + srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components);
> +
> + if (instr->op == nir_texop_query_levels) {
> + /* textureQueryLevels() is implemented in terms of TXS so we need to
> + * pass a valid LOD argument.
> + */
> + assert(srcs[TEX_LOGICAL_SRC_LOD].file == BAD_FILE);
> + srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0u);
> + }
> +
> + if (instr->op == nir_texop_samples_identical) {
> + fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
> +
> + /* If mcs is an immediate value, it means there is no MCS. In that
case
> + * just return false.
> + */
> + if (srcs[TEX_LOGICAL_SRC_MCS].file == BRW_IMMEDIATE_VALUE) {
> + bld.MOV(dst, brw_imm_ud(0u));
> + } else if ((key_tex->msaa_16 & (1 << sampler))) {
> + fs_reg tmp = vgrf(glsl_type::uint_type);
> + bld.OR(tmp, srcs[TEX_LOGICAL_SRC_MCS],
> + offset(srcs[TEX_LOGICAL_SRC_MCS], bld, 1));
> + bld.CMP(dst, tmp, brw_imm_ud(0u), BRW_CONDITIONAL_EQ);
> + } else {
> + bld.CMP(dst, srcs[TEX_LOGICAL_SRC_MCS], brw_imm_ud(0u),
> + BRW_CONDITIONAL_EQ);
> + }
>
> - const glsl_type *dest_type =
> - glsl_type::get_instance(dest_base_type,
nir_tex_instr_dest_size(instr),
> - 1);
> + return;
> + }
>
> - ir_texture_opcode op;
> + enum opcode opcode;
> switch (instr->op) {
> - case nir_texop_lod: op = ir_lod; break;
> - case nir_texop_query_levels: op = ir_query_levels; break;
> - case nir_texop_tex: op = ir_tex; break;
> - case nir_texop_tg4: op = ir_tg4; break;
> - case nir_texop_txb: op = ir_txb; break;
> - case nir_texop_txd: op = ir_txd; break;
> - case nir_texop_txf: op = ir_txf; break;
> - case nir_texop_txf_ms: op = ir_txf_ms; break;
> - case nir_texop_txl: op = ir_txl; break;
> - case nir_texop_txs: op = ir_txs; break;
> + case nir_texop_tex:
> + opcode = SHADER_OPCODE_TEX_LOGICAL;
> + break;
> + case nir_texop_txb:
> + opcode = FS_OPCODE_TXB_LOGICAL;
> + break;
> + case nir_texop_txl:
> + opcode = SHADER_OPCODE_TXL_LOGICAL;
> + break;
> + case nir_texop_txd:
> + opcode = SHADER_OPCODE_TXD_LOGICAL;
> + break;
> + case nir_texop_txf:
> + opcode = SHADER_OPCODE_TXF_LOGICAL;
> + break;
> + case nir_texop_txf_ms:
> + if ((key_tex->msaa_16 & (1 << sampler)))
> + opcode = SHADER_OPCODE_TXF_CMS_W_LOGICAL;
> + else
> + opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
> + break;
> + case nir_texop_query_levels:
> + case nir_texop_txs:
> + opcode = SHADER_OPCODE_TXS_LOGICAL;
> + break;
> + case nir_texop_lod:
> + opcode = SHADER_OPCODE_LOD_LOGICAL;
> + break;
> + case nir_texop_tg4:
> + if (srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file != BAD_FILE &&
> + srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file != IMM)
> + opcode = SHADER_OPCODE_TG4_OFFSET_LOGICAL;
> + else
> + opcode = SHADER_OPCODE_TG4_LOGICAL;
> + break;
> case nir_texop_texture_samples: {
> fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
>
> fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D, 4);
> fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, tmp,
> bld.vgrf(BRW_REGISTER_TYPE_D, 1),
> - texture_reg, texture_reg);
> + srcs[TEX_LOGICAL_SRC_SURFACE],
> + srcs[TEX_LOGICAL_SRC_SURFACE]);
> inst->mlen = 1;
> inst->header_size = 1;
> inst->base_mrf = -1;
> @@ -3220,33 +3271,83 @@ fs_visitor::nir_emit_texture(const fs_builder &bld,
nir_tex_instr *instr)
> bld.MOV(dst, tmp);
> return;
> }
> - case nir_texop_samples_identical: op = ir_samples_identical; break;
> default:
> unreachable("unknown texture opcode");
> }
>
> - unsigned num_components = nir_tex_instr_dest_size(instr);
> + fs_reg dst = bld.vgrf(brw_type_for_nir_type(instr->dest_type), 4);
> + fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
>
> - if (instr->dest.is_ssa) {
> - uint8_t write_mask = nir_ssa_def_components_read(&instr->dest.ssa);
> + const unsigned dest_size = nir_tex_instr_dest_size(instr);
> + if (devinfo->gen >= 9 &&
> + instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels) {
> + unsigned write_mask = instr->dest.is_ssa ?
> + nir_ssa_def_components_read(&instr->dest.ssa):
> + (1 << dest_size) - 1;
> assert(write_mask != 0); /* dead code should have been eliminated */
> - num_components = _mesa_fls(write_mask);
> + inst->regs_written = _mesa_fls(write_mask) * dispatch_width / 8;
> + } else {
> + inst->regs_written = 4 * dispatch_width / 8;
> + }
> +
> + if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE)
> + inst->shadow_compare = true;
> +
> + if (srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file == IMM)
> + inst->offset = srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].ud;
> +
> + if (instr->op == nir_texop_tg4) {
> + if (instr->component == 1 &&
> + key_tex->gather_channel_quirk_mask & (1 << texture)) {
> + /* gather4 sampler is broken for green channel on RG32F --
> + * we must ask for blue instead.
> + */
> + inst->offset |= 2 << 16;
> + } else {
> + inst->offset |= instr->component << 16;
> + }
> +
> + if (devinfo->gen == 6)
> + emit_gen6_gather_wa(key_tex->gen6_gather_wa[texture], dst);
> + }
> +
> + if (instr->op == nir_texop_query_levels) {
> + /* # levels is in .w */
> + dst = offset(dst, bld, 3);
> }
>
> - const bool can_reduce_return_length = devinfo->gen >= 9 &&
> - instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels;
> + bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
> + instr->is_array;
> +
> + /* fixup #layers for cube map arrays */
> + if (instr->op == nir_texop_txs && (devinfo->gen < 7 || is_cube_array)) {
> + fs_reg depth = offset(dst, bld, 2);
> + fs_reg fixed_depth = vgrf(glsl_type::int_type);
>
> - emit_texture(op, dest_type, coordinate, instr->coord_components,
> - shadow_comparitor, lod, lod2, lod_components, sample_index,
> - tex_offset, mcs, gather_component, is_cube_array,
> - texture, texture_reg, sampler, sampler_reg,
> - can_reduce_return_length ? num_components : 4);
> + if (is_cube_array) {
> + bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth,
brw_imm_d(6));
> + } else if (devinfo->gen < 7) {
> + /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
> + bld.emit_minmax(fixed_depth, depth, brw_imm_d(1),
BRW_CONDITIONAL_GE);
> + }
> +
> + fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst-
>regs_written);
> + int components = inst->regs_written / (inst->exec_size / 8);
> + for (int i = 0; i < components; i++) {
> + if (i == 2) {
> + fixed_payload[i] = fixed_depth;
> + } else {
> + fixed_payload[i] = offset(dst, bld, i);
> + }
> + }
> + bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0);
> + }
>
> - fs_reg dest = get_nir_dest(instr->dest);
> - dest.type = this->result.type;
> + fs_reg nir_dest = get_nir_dest(instr->dest);
> + nir_dest.type = dst.type;
> emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
> - dest, this->result),
> - (1 << num_components) - 1);
> + nir_dest, dst),
> + (1 << dest_size) - 1);
This misses one thing from my recent Skylake rlen reduction work:
instead of using (1 << dest_size) - 1, I used the mask from
nir_ssa_def_components_read() here. That way, if some of the channels
aren't actually used, we don't bother emitting MOVs for them.
I don't know that it's a big deal, but it semes easy enough to preserve.
With that fixed, the series is:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160504/911e3246/attachment-0001.sig>
More information about the mesa-dev
mailing list