[Mesa-dev] [RFC 5/7] i965/fs: Implement the new nir_scratch_load/store opcodes
Jason Ekstrand
jason at jlekstrand.net
Mon Dec 5 19:59:56 UTC 2016
This uses either the normal scratch read/write messages that we use for
spilling or the DWORD scattered read/write messages for when we have
indirects.
---
src/mesa/drivers/dri/i965/brw_defines.h | 2 +
src/mesa/drivers/dri/i965/brw_fs.cpp | 111 ++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 30 ++++++
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 +-
src/mesa/drivers/dri/i965/brw_shader.cpp | 5 +
5 files changed, 150 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 1fd0b94..0a96285 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1065,6 +1065,8 @@ enum opcode {
SHADER_OPCODE_GEN7_SCRATCH_READ,
SHADER_OPCODE_DWORD_SCATTERED_READ,
SHADER_OPCODE_DWORD_SCATTERED_WRITE,
+ SHADER_OPCODE_SCRATCH_READ_LOGICAL,
+ SHADER_OPCODE_SCRATCH_WRITE_LOGICAL,
/**
* Gen8+ SIMD8 URB Read messages.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8d47638..594f1f4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3571,6 +3571,112 @@ fs_visitor::lower_minmax()
}
static void
+lower_scratch_logical_send(const fs_builder &bld, fs_inst *inst)
+{
+ const gen_device_info *devinfo = bld.shader->devinfo;
+ const fs_reg &offset = inst->src[0];
+
+ /* The offsets provided by NIR are in bytes but they do not take channels
+ * into account. We need to multiply by dispatch width.
+ */
+ const unsigned dispatch_width =
+ static_cast<const fs_visitor *>(bld.shader)->dispatch_width;
+
+ if (offset.file == IMM) {
+ /* The immediate versions are easy. Let's lower those first. */
+ if (inst->opcode == SHADER_OPCODE_SCRATCH_READ_LOGICAL) {
+ /* The Gen7 descriptor-based offset is 12 bits of HWORD units.
+ * Because the Gen7-style scratch block read is hardwired to BTI 255,
+ * on Gen9+ it would cause the DC to do an IA-coherent read, what
+ * largely outweighs the slight advantage from not having to provide
+ * the address as part of the message header, so we're better off
+ * using plain old oword block reads.
+ */
+ const unsigned byte_offset = offset.d * dispatch_width;
+ if (devinfo->gen >= 7 && devinfo->gen < 9 &&
+ byte_offset < (1 << 12) * REG_SIZE) {
+ inst->opcode = SHADER_OPCODE_GEN7_SCRATCH_READ;
+ } else {
+ inst->opcode = SHADER_OPCODE_GEN4_SCRATCH_READ;
+ inst->base_mrf = 13;
+ inst->mlen = 1; /* header contains offset */
+ }
+ inst->offset = byte_offset;
+ inst->sources = 0;
+ return;
+ } else {
+ assert(inst->opcode == SHADER_OPCODE_SCRATCH_WRITE_LOGICAL);
+ const unsigned byte_offset = offset.d * dispatch_width;
+ inst->opcode = SHADER_OPCODE_GEN4_SCRATCH_WRITE;
+ inst->mlen = 1 + (dispatch_width / 8); /* header, value */
+ inst->base_mrf = 13;
+ inst->offset = byte_offset;
+ /* Move the "value" source to the right spot */
+ inst->src[0] = inst->src[1];
+ inst->sources = 1;
+ return;
+ }
+ }
+
+ if (inst->opcode == SHADER_OPCODE_SCRATCH_READ_LOGICAL) {
+ inst->opcode = SHADER_OPCODE_DWORD_SCATTERED_READ;
+ } else {
+ assert(inst->opcode == SHADER_OPCODE_SCRATCH_WRITE_LOGICAL);
+ inst->opcode = SHADER_OPCODE_DWORD_SCATTERED_WRITE;
+ }
+
+ fs_reg sources[3];
+
+ const unsigned base_offset = 0;
+ const fs_builder hbld = bld.exec_all().group(8, 0);
+ sources[0] = hbld.vgrf(BRW_REGISTER_TYPE_UD);
+ hbld.MOV(sources[0], retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ hbld.group(1, 0).MOV(component(sources[0], 2), brw_imm_ud(base_offset));
+
+ fs_reg channel_ids = bld.vgrf(BRW_REGISTER_TYPE_W);
+ bld.emit(SHADER_OPCODE_CHANNEL_IDS, channel_ids);
+
+ sources[1] = bld.vgrf(BRW_REGISTER_TYPE_D);
+ if (devinfo->gen < 6) {
+ /* On Gen < 6, the offsets are in bytes */
+ fs_reg bytes = bld.vgrf(BRW_REGISTER_TYPE_D);
+ bld.MUL(bytes, offset, brw_imm_d(dispatch_width));
+ fs_reg stagger = bld.vgrf(BRW_REGISTER_TYPE_D);
+ bld.MUL(stagger, channel_ids, brw_imm_d(4));
+ bld.ADD(sources[1], bytes, stagger);
+ } else {
+ /* On Gen >= 6, the offsets are in dwords */
+ fs_reg dwords = bld.vgrf(BRW_REGISTER_TYPE_D);
+ bld.MUL(dwords, offset, brw_imm_d(dispatch_width / 4));
+ bld.ADD(sources[1], dwords, channel_ids);
+ }
+
+ unsigned num_sources;
+ if (inst->opcode == SHADER_OPCODE_DWORD_SCATTERED_WRITE) {
+ sources[2] = inst->src[1]; /* value */
+ num_sources = 3;
+ } else {
+ num_sources = 2;
+ }
+
+ fs_reg payload;
+ if (devinfo->gen >= 7) {
+ payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F);
+ fs_inst *load = bld.LOAD_PAYLOAD(payload, sources, num_sources, 1);
+ load->dst.nr = bld.shader->alloc.allocate(regs_written(load));
+ inst->src[0] = load->dst;
+ inst->mlen = regs_written(load);
+ inst->sources = 1;
+ } else {
+ payload = fs_reg(MRF, 13);
+ fs_inst *load = bld.LOAD_PAYLOAD(payload, sources, num_sources, 1);
+ inst->base_mrf = 13;
+ inst->mlen = regs_written(load);
+ inst->sources = 0;
+ }
+}
+
+static void
setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
fs_reg *dst, fs_reg color, unsigned components)
{
@@ -4349,6 +4455,11 @@ fs_visitor::lower_logical_sends()
const fs_builder ibld(this, block, inst);
switch (inst->opcode) {
+ case SHADER_OPCODE_SCRATCH_READ_LOGICAL:
+ case SHADER_OPCODE_SCRATCH_WRITE_LOGICAL:
+ lower_scratch_logical_send(ibld, inst);
+ break;
+
case FS_OPCODE_FB_WRITE_LOGICAL:
assert(stage == MESA_SHADER_FRAGMENT);
lower_fb_write_logical_send(ibld, inst,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 9478bb8..ca4c7fb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -39,6 +39,7 @@ fs_visitor::emit_nir_code()
nir_setup_outputs();
nir_setup_uniforms();
nir_emit_system_values();
+ last_scratch = nir->num_scratch * dispatch_width;
/* get the main function and emit it */
nir_foreach_function(function, nir) {
@@ -4316,6 +4317,35 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
+ case nir_intrinsic_load_scratch: {
+ const unsigned base_offset = nir_intrinsic_base(instr);
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ fs_reg offset;
+ if (const_offset) {
+ offset = brw_imm_d(const_offset->i32[0] + base_offset);
+ } else {
+ offset = bld.vgrf(BRW_REGISTER_TYPE_D);
+ bld.ADD(offset, get_nir_src(instr->src[0]), brw_imm_d(base_offset));
+ }
+ bld.emit(SHADER_OPCODE_SCRATCH_READ_LOGICAL, dest, offset);
+ break;
+ }
+
+ case nir_intrinsic_store_scratch: {
+ const unsigned base_offset = nir_intrinsic_base(instr);
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ fs_reg offset;
+ if (const_offset) {
+ offset = brw_imm_d(const_offset->i32[0] + base_offset);
+ } else {
+ offset = bld.vgrf(BRW_REGISTER_TYPE_D);
+ bld.ADD(offset, get_nir_src(instr->src[1]), brw_imm_d(base_offset));
+ }
+ bld.emit(SHADER_OPCODE_SCRATCH_WRITE_LOGICAL, bld.null_reg_f(),
+ offset, get_nir_src(instr->src[0]));
+ break;
+ }
+
default:
unreachable("unknown intrinsic");
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 5c6f3d4..03ee079 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -750,7 +750,7 @@ namespace {
}
}
-static void
+void
emit_unspill(const fs_builder &bld, fs_reg dst,
uint32_t spill_offset, unsigned count)
{
@@ -785,7 +785,7 @@ emit_unspill(const fs_builder &bld, fs_reg dst,
}
}
-static void
+void
emit_spill(const fs_builder &bld, fs_reg src,
uint32_t spill_offset, unsigned count)
{
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 79fbb96..8cfcbad 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -302,6 +302,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
return "dword_scattered_read";
case SHADER_OPCODE_DWORD_SCATTERED_WRITE:
return "dword_scattered_write";
+ case SHADER_OPCODE_SCRATCH_READ_LOGICAL:
+ return "scratch_read_logical";
+ case SHADER_OPCODE_SCRATCH_WRITE_LOGICAL:
+ return "scratch_write_logical";
case SHADER_OPCODE_URB_WRITE_SIMD8:
return "gen8_urb_write_simd8";
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
@@ -1010,6 +1014,7 @@ backend_instruction::has_side_effects() const
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
case SHADER_OPCODE_DWORD_SCATTERED_WRITE:
+ case SHADER_OPCODE_SCRATCH_WRITE_LOGICAL:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_TYPED_ATOMIC:
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list