[Mesa-dev] [PATCH 06/21] i965/fs: Implement non-coherent framebuffer fetch using the sampler unit.
Kenneth Graunke
kenneth at whitecape.org
Thu Jul 28 07:38:13 UTC 2016
On Friday, July 22, 2016 8:59:00 PM PDT Francisco Jerez wrote:
> ---
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 89 ++++++++++++++++++++++++++++++++
> 1 file changed, 89 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 2872b2d..f5f918d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -3155,6 +3155,95 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
> }
> }
>
> +/**
> + * Fetch the current render target layer index.
> + */
> +static fs_reg
> +fetch_render_target_array_index(const fs_builder &bld)
> +{
> + if (bld.shader->devinfo->gen >= 6) {
> + /* The render target array index is provided in the thread payload as
> + * bits 26:16 of r0.0.
> + */
> + const fs_reg idx = bld.vgrf(BRW_REGISTER_TYPE_UD);
> + bld.AND(idx, brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 0, 1),
> + brw_imm_uw(0x7ff));
> + return idx;
> + } else {
> + /* Pre-SNB we only ever render into the first layer of the framebuffer
> + * since layered rendering is not implemented.
> + */
> + return brw_imm_ud(0);
> + }
> +}
> +
> +/**
> + * Fake non-coherent framebuffer read implemented using TXF to fetch from the
> + * framebuffer at the current fragment coordinates and sample index.
> + */
> +static fs_inst *
> +emit_non_coherent_fb_read(fs_visitor *v, const fs_reg &dst, unsigned target)
> +{
> + const fs_builder &bld = v->bld;
I'd rather see this as a class method, for now. You're passing in the
fs_visitor anyway, and accessing 4 fields (bld, key, pixel_x, pixel_y)
and calling 2 methods (emit_sampleid_setup and emit_mcs_fetch). So it
feels a bit awkward to make it a separate function.
We definitely want to clean up fs_visitor in the future...but we may as
well be consistent for now.
> + const struct brw_device_info *devinfo = bld.shader->devinfo;
> +
> + assert(bld.shader->stage == MESA_SHADER_FRAGMENT);
> + const brw_wm_prog_key *wm_key =
> + reinterpret_cast<const brw_wm_prog_key *>(v->key);
> + assert(!wm_key->coherent_fb_fetch);
> + const brw_wm_prog_data *wm_prog_data =
> + reinterpret_cast<const brw_wm_prog_data *>(bld.shader->stage_prog_data);
> +
> + /* Calculate the surface index relative to the start of the texture binding
> + * table block, since that's what the texturing messages expect.
> + */
> + const unsigned surface = target +
> + wm_prog_data->binding_table.render_target_read_start -
> + wm_prog_data->base.binding_table.texture_start;
> +
> + brw_mark_surface_used(
> + bld.shader->stage_prog_data,
> + wm_prog_data->binding_table.render_target_read_start + target);
> +
> + /* Calculate the fragment coordinates. */
> + const fs_reg coords = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);
> + bld.MOV(offset(coords, bld, 0), v->pixel_x);
> + bld.MOV(offset(coords, bld, 1), v->pixel_y);
> + bld.MOV(offset(coords, bld, 2), fetch_render_target_array_index(bld));
Maybe use LOAD_PAYLOAD here?
fs_reg *coord_components[3] =
{ pixel_x, pixel_y, fetch_render_target_array_index(bld) };
bld.LOAD_PAYLOAD(coords, coord_components, 3, 0);
> + /* Calculate the sample index and MCS payload when multisampling. Luckily
> + * the MCS fetch message behaves deterministically for UMS surfaces, so it
> + * shouldn't be necessary to recompile based on whether the framebuffer is
> + * CMS or UMS.
> + */
> + const fs_reg sample = wm_key->multisample_fbo ?
> + *v->emit_sampleid_setup() : fs_reg();
If gl_SampleID is used in the shader, this will emit a second, redundant
copy of the sample ID setup code, which is non-trivial. Hopefully it'll
be cleaned up, but I think it'd be better to reuse the existing copy.
if (wm_key->multisample_fbo &&
nir_system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE)
nir_system_values[SYSTEM_VALUE_SAMPLE_ID] = *emit_sampleid_setup();
const fs_reg sample = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
> + const fs_reg mcs = wm_key->multisample_fbo ?
> + v->emit_mcs_fetch(coords, 3, brw_imm_ud(surface)) : fs_reg();
> +
> + /* Use either a normal or a CMS texel fetch message depending on whether
> + * the framebuffer is single or multisample. On SKL+ use the wide CMS
> + * message just in case the framebuffer uses 16x multisampling, it should
> + * be equivalent to the normal CMS fetch for lower multisampling modes.
> + */
> + const opcode op = (!wm_key->multisample_fbo ? SHADER_OPCODE_TXF_LOGICAL :
> + devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W_LOGICAL :
> + SHADER_OPCODE_TXF_CMS_LOGICAL);
If you're going to use nested ternaries, I'd appreciate parenthesis
around the nested expression. The outer ones don't seem useful to me.
Specifically, I would prefer:
const opcode op = !wm_key->multisample_fbo ? SHADER_OPCODE_TXF_LOGICAL :
(devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W_LOGICAL :
SHADER_OPCODE_TXF_CMS_LOGICAL);
> +
> + /* Emit the instruction. */
> + const fs_reg srcs[] = { coords, fs_reg(), brw_imm_ud(0), fs_reg(),
> + sample, mcs,
> + brw_imm_ud(surface), brw_imm_ud(0),
> + fs_reg(), brw_imm_ud(3), brw_imm_ud(0) };
> + STATIC_ASSERT(ARRAY_SIZE(srcs) == TEX_LOGICAL_NUM_SRCS);
> +
> + fs_inst *inst = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs));
> + inst->regs_written = 4 * inst->dst.component_size(inst->exec_size) /
> + REG_SIZE;
> +
> + return inst;
> +}
> +
> void
> fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
> nir_intrinsic_instr *instr)
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160728/67fa53cd/attachment.sig>
More information about the mesa-dev
mailing list