[Mesa-dev] [PATCH 08/12] i965/fs: Implement lowering of logical texturing opcodes on Gen4.
Jason Ekstrand
jason at jlekstrand.net
Tue Jul 21 15:24:24 PDT 2015
I *think* this is sane. I compared it against the current SIMD16 and
SIMD8 code and it looks like it does the right thing. However, it's
not a direct translation and I don't know the gen4 texturing code, so
I can't really call it reviewed.
Acked-by: Jason Ekstrand <jason.ekstrand at intel.com>
Ken, could you give it a quick look?
On Sat, Jul 18, 2015 at 7:34 AM, Francisco Jerez <currojerez at riseup.net> wrote:
> Unlike its Gen5 and Gen7 counterparts this patch isn't a plain
> refactor of the previous Gen4 texturing code, it's more of a rewrite
> largely based on emit_texture_gen4_simd16(). The reason is that on
> the one hand the original emit_texture_gen4() code didn't seem easily
> fixable to be SIMD width-invariant and had plenty of clutter to
> support SIMD-width workarounds which are no longer required. On the
> other hand emit_texture_gen4_simd16() was missing a number of
> SIMD8-only opcodes. This should generalize both and roughly match
> their current behaviour where there is overlap.
>
> Incidentally this will fix the following piglits on Gen4:
>
> arb_shader_texture_lod.execution.arb_shader_texture_lod-texgrad
> arb_shader_texture_lod.execution.tex-miplevel-selection *gradarb 2d
> arb_shader_texture_lod.execution.tex-miplevel-selection *gradarb 3d
> arb_shader_texture_lod.execution.tex-miplevel-selection *projgradarb 2d
> arb_shader_texture_lod.execution.tex-miplevel-selection *projgradarb 2d_projvec4
> arb_shader_texture_lod.execution.tex-miplevel-selection *projgradarb 3d
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 108 ++++++++++++++++++++++++++++++++++-
> 1 file changed, 107 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 5233ac3..043d9e9 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -3370,6 +3370,110 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
> }
>
> static void
> +lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
> + const fs_reg &coordinate,
> + const fs_reg &shadow_c,
> + const fs_reg &lod, const fs_reg &lod2,
> + const fs_reg &sampler,
> + unsigned coord_components,
> + unsigned grad_components)
> +{
> + const bool has_lod = (op == SHADER_OPCODE_TXL || op == FS_OPCODE_TXB ||
> + op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS);
> + fs_reg msg_begin(MRF, 1, BRW_REGISTER_TYPE_F);
> + fs_reg msg_end = msg_begin;
> +
> + /* g0 header. */
> + msg_end = offset(msg_end, bld.group(8, 0), 1);
> +
> + for (unsigned i = 0; i < coord_components; i++)
> + bld.MOV(retype(offset(msg_end, bld, i), coordinate.type),
> + offset(coordinate, bld, i));
> +
> + msg_end = offset(msg_end, bld, coord_components);
> +
> + /* Messages other than SAMPLE and RESINFO in SIMD16 and TXD in SIMD8
> + * require all three components to be present and zero if they are unused.
> + */
> + if (coord_components > 0 &&
> + (has_lod || shadow_c.file != BAD_FILE ||
> + (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) {
> + for (unsigned i = coord_components; i < 3; i++)
> + bld.MOV(offset(msg_end, bld, i), fs_reg(0.0f));
> +
> + msg_end = offset(msg_end, bld, 3 - coord_components);
> + }
> +
> + if (op == SHADER_OPCODE_TXD) {
> + /* TXD unsupported in SIMD16 mode. */
> + assert(bld.dispatch_width() == 8);
> +
> + /* the slots for u and v are always present, but r is optional */
> + if (coord_components < 2)
> + msg_end = offset(msg_end, bld, 2 - coord_components);
> +
> + /* P = u, v, r
> + * dPdx = dudx, dvdx, drdx
> + * dPdy = dudy, dvdy, drdy
> + *
> + * 1-arg: Does not exist.
> + *
> + * 2-arg: dudx dvdx dudy dvdy
> + * dPdx.x dPdx.y dPdy.x dPdy.y
> + * m4 m5 m6 m7
> + *
> + * 3-arg: dudx dvdx drdx dudy dvdy drdy
> + * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
> + * m5 m6 m7 m8 m9 m10
> + */
> + for (unsigned i = 0; i < grad_components; i++)
> + bld.MOV(offset(msg_end, bld, i), offset(lod, bld, i));
> +
> + msg_end = offset(msg_end, bld, MAX2(grad_components, 2));
> +
> + for (unsigned i = 0; i < grad_components; i++)
> + bld.MOV(offset(msg_end, bld, i), offset(lod2, bld, i));
> +
> + msg_end = offset(msg_end, bld, MAX2(grad_components, 2));
> + }
> +
> + if (has_lod) {
> + /* Bias/LOD with shadow comparitor is unsupported in SIMD16 -- *Without*
> + * shadow comparitor (including RESINFO) it's unsupported in SIMD8 mode.
> + */
> + assert(shadow_c.file != BAD_FILE ? bld.dispatch_width() == 8 :
> + bld.dispatch_width() == 16);
> +
> + const brw_reg_type type =
> + (op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS ?
> + BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
> + bld.MOV(retype(msg_end, type), lod);
> + msg_end = offset(msg_end, bld, 1);
> + }
> +
> + if (shadow_c.file != BAD_FILE) {
> + if (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8) {
> + /* There's no plain shadow compare message, so we use shadow
> + * compare with a bias of 0.0.
> + */
> + bld.MOV(msg_end, fs_reg(0.0f));
> + msg_end = offset(msg_end, bld, 1);
> + }
> +
> + bld.MOV(msg_end, shadow_c);
> + msg_end = offset(msg_end, bld, 1);
> + }
> +
> + inst->opcode = op;
> + inst->src[0] = reg_undef;
> + inst->src[1] = sampler;
> + inst->resize_sources(2);
> + inst->base_mrf = msg_begin.reg;
> + inst->mlen = msg_end.reg - msg_begin.reg;
> + inst->header_size = 1;
> +}
> +
> +static void
> lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
> fs_reg coordinate,
> const fs_reg &shadow_c,
> @@ -3708,7 +3812,9 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
> sampler, offset_value,
> coord_components, grad_components);
> } else {
> - assert(!"Not implemented");
> + lower_sampler_logical_send_gen4(bld, inst, op, coordinate,
> + shadow_c, lod, lod2, sampler,
> + coord_components, grad_components);
> }
> }
>
> --
> 2.4.3
>
More information about the mesa-dev
mailing list