[Mesa-dev] [PATCH 07/21] intel/fs: Use the generic SEND opcode for surface messages
Jason Ekstrand
jason at jlekstrand.net
Sat Nov 17 02:47:35 UTC 2018
---
src/intel/compiler/brw_eu.h | 27 ---
src/intel/compiler/brw_eu_emit.c | 72 -------
src/intel/compiler/brw_fs.cpp | 181 +++++++++++++-----
src/intel/compiler/brw_fs_generator.cpp | 62 ------
.../compiler/brw_schedule_instructions.cpp | 65 +++++++
5 files changed, 193 insertions(+), 214 deletions(-)
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 3b15a4a82b7..2d8d138b993 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -787,17 +787,6 @@ brw_untyped_atomic(struct brw_codegen *p,
bool response_expected,
bool header_present);
-void
-brw_untyped_atomic_float(struct brw_codegen *p,
- struct brw_reg dst,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned atomic_op,
- unsigned msg_length,
- bool response_expected,
- bool header_present);
-
-
void
brw_untyped_surface_read(struct brw_codegen *p,
struct brw_reg dst,
@@ -841,22 +830,6 @@ brw_typed_surface_write(struct brw_codegen *p,
unsigned num_channels,
bool header_present);
-void
-brw_byte_scattered_read(struct brw_codegen *p,
- struct brw_reg dst,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned msg_length,
- unsigned bit_size);
-
-void
-brw_byte_scattered_write(struct brw_codegen *p,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned msg_length,
- unsigned bit_size,
- bool header_present);
-
void
brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst,
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index e1b668c70ee..42ed77d6b12 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -2774,35 +2774,6 @@ brw_untyped_atomic(struct brw_codegen *p,
payload, surface, desc);
}
-void
-brw_untyped_atomic_float(struct brw_codegen *p,
- struct brw_reg dst,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned atomic_op,
- unsigned msg_length,
- bool response_expected,
- bool header_present)
-{
- const struct gen_device_info *devinfo = p->devinfo;
-
- assert(devinfo->gen >= 9);
- assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-
- const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
- const unsigned exec_size = 1 << brw_get_default_exec_size(p);
- const unsigned response_length =
- brw_surface_payload_size(p, response_expected, exec_size);
- const unsigned desc =
- brw_message_desc(devinfo, msg_length, response_length, header_present) |
- brw_dp_untyped_atomic_float_desc(devinfo, exec_size, atomic_op,
- response_expected);
-
- brw_send_indirect_surface_message(p, sfid,
- brw_writemask(dst, WRITEMASK_XYZW),
- payload, surface, desc);
-}
-
void
brw_untyped_surface_read(struct brw_codegen *p,
struct brw_reg dst,
@@ -2853,49 +2824,6 @@ brw_untyped_surface_write(struct brw_codegen *p,
payload, surface, desc);
}
-void
-brw_byte_scattered_read(struct brw_codegen *p,
- struct brw_reg dst,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned msg_length,
- unsigned bit_size)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- assert(devinfo->gen > 7 || devinfo->is_haswell);
- assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
- const unsigned exec_size = 1 << brw_get_default_exec_size(p);
- const unsigned response_length = brw_surface_payload_size(p, 1, exec_size);
- const unsigned desc =
- brw_message_desc(devinfo, msg_length, response_length, false) |
- brw_dp_byte_scattered_rw_desc(devinfo, exec_size, bit_size, false);
-
- brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE,
- dst, payload, surface, desc);
-}
-
-void
-brw_byte_scattered_write(struct brw_codegen *p,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned msg_length,
- unsigned bit_size,
- bool header_present)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- assert(devinfo->gen > 7 || devinfo->is_haswell);
- assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
- const unsigned exec_size = 1 << brw_get_default_exec_size(p);
- const unsigned desc =
- brw_message_desc(devinfo, msg_length, 0, header_present) |
- brw_dp_byte_scattered_rw_desc(devinfo, exec_size, bit_size, true);
-
- brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE,
- brw_writemask(brw_null_reg(),
- WRITEMASK_XYZW),
- payload, surface, desc);
-}
-
void
brw_typed_atomic(struct brw_codegen *p,
struct brw_reg dst,
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 2f6f76ce1df..48009adaaa0 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -4767,8 +4767,7 @@ emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask)
}
static void
-lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
- const fs_reg &sample_mask)
+lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
{
const gen_device_info *devinfo = bld.shader->devinfo;
@@ -4778,10 +4777,17 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg &surface = inst->src[2];
const UNUSED fs_reg &dims = inst->src[3];
const fs_reg &arg = inst->src[4];
+ assert(arg.file == IMM);
/* Calculate the total number of components of the payload. */
const unsigned addr_sz = inst->components_read(0);
const unsigned src_sz = inst->components_read(1);
+
+ const bool is_typed_access =
+ inst->opcode == SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL ||
+ inst->opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL ||
+ inst->opcode == SHADER_OPCODE_TYPED_ATOMIC_LOGICAL;
+
/* From the BDW PRM Volume 7, page 147:
*
* "For the Data Cache Data Port*, the header must be present for the
@@ -4792,10 +4798,7 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
* messages prior to Gen9, since we have to provide a header anyway. On
* Gen11+ the header has been removed so we can only use predication.
*/
- const unsigned header_sz = devinfo->gen < 9 &&
- (op == SHADER_OPCODE_TYPED_SURFACE_READ ||
- op == SHADER_OPCODE_TYPED_SURFACE_WRITE ||
- op == SHADER_OPCODE_TYPED_ATOMIC) ? 1 : 0;
+ const unsigned header_sz = devinfo->gen < 9 && is_typed_access ? 1 : 0;
const unsigned sz = header_sz + addr_sz + src_sz;
/* Allocate space for the payload. */
@@ -4803,6 +4806,10 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
unsigned n = 0;
+ const bool has_side_effects = inst->has_side_effects();
+ fs_reg sample_mask = has_side_effects ? bld.sample_mask_reg() :
+ fs_reg(brw_imm_d(0xffff));
+
/* Construct the payload. */
if (header_sz)
components[n++] = emit_surface_header(bld, sample_mask);
@@ -4841,14 +4848,124 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
}
}
+ uint32_t sfid;
+ switch (inst->opcode) {
+ case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
+ case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+ /* Byte scattered opcodes go through the normal data cache */
+ sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+ break;
+
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ /* Untyped Surface messages go through the data cache but the SFID value
+ * changed on Haswell.
+ */
+ sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
+ HSW_SFID_DATAPORT_DATA_CACHE_1 :
+ GEN7_SFID_DATAPORT_DATA_CACHE);
+ break;
+
+ case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+ case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+ /* Typed surface messages go through the render cache on IVB and the
+ * data cache on HSW+.
+ */
+ sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
+ HSW_SFID_DATAPORT_DATA_CACHE_1 :
+ GEN6_SFID_DATAPORT_RENDER_CACHE);
+ break;
+
+ default:
+ unreachable("Unsupported surface opcode");
+ }
+
+ uint32_t desc;
+ switch (inst->opcode) {
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+ desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
+ arg.ud, /* num_channels */
+ false /* write */);
+ break;
+
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+ desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
+ arg.ud, /* num_channels */
+ true /* write */);
+ break;
+
+ case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+ desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
+ arg.ud, /* bit_size */
+ false /* write */);
+ break;
+
+ case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
+ desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
+ arg.ud, /* bit_size */
+ true /* write */);
+ break;
+
+ case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ desc = brw_dp_untyped_atomic_desc(devinfo, inst->exec_size,
+ arg.ud, /* atomic_op */
+ !inst->dst.is_null());
+ break;
+
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ desc = brw_dp_untyped_atomic_float_desc(devinfo, inst->exec_size,
+ arg.ud, /* atomic_op */
+ !inst->dst.is_null());
+ break;
+
+ case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group,
+ arg.ud, /* num_channels */
+ false /* write */);
+ break;
+
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+ desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group,
+ arg.ud, /* num_channels */
+ true /* write */);
+ break;
+
+ case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+ desc = brw_dp_typed_atomic_desc(devinfo, inst->exec_size, inst->group,
+ arg.ud, /* atomic_op */
+ !inst->dst.is_null());
+ break;
+
+ default:
+ unreachable("Unknown surface logical instruction");
+ }
+
/* Update the original instruction. */
- inst->opcode = op;
+ inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
inst->header_size = header_sz;
+ inst->send_has_side_effects = has_side_effects;
+
+ /* Set up SFID and descriptors */
+ inst->sfid = sfid;
+ inst->desc = desc;
+ if (surface.file == IMM) {
+ inst->desc |= surface.ud & 0xff;
+ inst->src[0] = brw_imm_ud(0);
+ } else {
+ const fs_builder ubld = bld.exec_all().group(1, 0);
+ fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ ubld.AND(tmp, surface, brw_imm_ud(0xff));
+ inst->src[0] = component(tmp, 0);
+ }
+ inst->src[1] = brw_imm_ud(0); /* ex_desc */
+
+ /* Finally, the payload */
+ inst->src[2] = payload;
- inst->src[0] = payload;
- inst->src[1] = surface;
- inst->src[2] = arg;
inst->resize_sources(3);
delete[] components;
@@ -4992,57 +5109,15 @@ fs_visitor::lower_logical_sends()
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
- lower_surface_logical_send(ibld, inst,
- SHADER_OPCODE_UNTYPED_SURFACE_READ,
- fs_reg());
- break;
-
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
- lower_surface_logical_send(ibld, inst,
- SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
- ibld.sample_mask_reg());
- break;
-
case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
- lower_surface_logical_send(ibld, inst,
- SHADER_OPCODE_BYTE_SCATTERED_READ,
- fs_reg());
- break;
-
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
- lower_surface_logical_send(ibld, inst,
- SHADER_OPCODE_BYTE_SCATTERED_WRITE,
- ibld.sample_mask_reg());
- break;
-
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
- lower_surface_logical_send(ibld, inst,
- SHADER_OPCODE_UNTYPED_ATOMIC,
- ibld.sample_mask_reg());
- break;
-
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
- lower_surface_logical_send(ibld, inst,
- SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT,
- ibld.sample_mask_reg());
- break;
-
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
- lower_surface_logical_send(ibld, inst,
- SHADER_OPCODE_TYPED_SURFACE_READ,
- brw_imm_d(0xffff));
- break;
-
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
- lower_surface_logical_send(ibld, inst,
- SHADER_OPCODE_TYPED_SURFACE_WRITE,
- ibld.sample_mask_reg());
- break;
-
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
- lower_surface_logical_send(ibld, inst,
- SHADER_OPCODE_TYPED_ATOMIC,
- ibld.sample_mask_reg());
+ lower_surface_logical_send(ibld, inst);
break;
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index c1635456b90..1a6e57ae1a7 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2237,68 +2237,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_shader_time_add(inst, src[0], src[1], src[2]);
break;
- case SHADER_OPCODE_UNTYPED_ATOMIC:
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud,
- inst->mlen, !inst->dst.is_null(),
- inst->header_size);
- break;
-
- case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_untyped_atomic_float(p, dst, src[0], src[1], src[2].ud,
- inst->mlen, !inst->dst.is_null(),
- inst->header_size);
- break;
-
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- assert(!inst->header_size);
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_untyped_surface_read(p, dst, src[0], src[1],
- inst->mlen, src[2].ud);
- break;
-
- case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_untyped_surface_write(p, src[0], src[1],
- inst->mlen, src[2].ud,
- inst->header_size);
- break;
-
- case SHADER_OPCODE_BYTE_SCATTERED_READ:
- assert(!inst->header_size);
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_byte_scattered_read(p, dst, src[0], src[1],
- inst->mlen, src[2].ud);
- break;
-
- case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_byte_scattered_write(p, src[0], src[1],
- inst->mlen, src[2].ud,
- inst->header_size);
- break;
-
- case SHADER_OPCODE_TYPED_ATOMIC:
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_typed_atomic(p, dst, src[0], src[1],
- src[2].ud, inst->mlen, !inst->dst.is_null(),
- inst->header_size);
- break;
-
- case SHADER_OPCODE_TYPED_SURFACE_READ:
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_typed_surface_read(p, dst, src[0], src[1],
- inst->mlen, src[2].ud,
- inst->header_size);
- break;
-
- case SHADER_OPCODE_TYPED_SURFACE_WRITE:
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud,
- inst->header_size);
- break;
-
case SHADER_OPCODE_MEMORY_FENCE:
brw_memory_fence(p, dst, BRW_OPCODE_SEND);
break;
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp
index 71b7ad59f7c..c21679754ee 100644
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -416,6 +416,71 @@ schedule_node::set_latency_gen7(bool is_haswell)
case SHADER_OPCODE_SEND:
switch (inst->sfid) {
+ case GEN6_SFID_DATAPORT_RENDER_CACHE:
+ switch ((inst->desc >> 14) & 0x1f) {
+ case GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE:
+ case GEN7_DATAPORT_RC_TYPED_SURFACE_READ:
+ /* See also SHADER_OPCODE_TYPED_SURFACE_READ */
+ assert(!is_haswell);
+ latency = 600;
+ break;
+
+ case GEN7_DATAPORT_RC_TYPED_ATOMIC_OP:
+ /* See also SHADER_OPCODE_TYPED_ATOMIC */
+ assert(!is_haswell);
+ latency = 14000;
+ break;
+
+ default:
+ unreachable("Unknown render cache message");
+ }
+ break;
+
+ case GEN7_SFID_DATAPORT_DATA_CACHE:
+ switch ((inst->desc >> 14) & 0x1f) {
+ case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ:
+ case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE:
+ /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
+ assert(!is_haswell);
+ latency = 600;
+ break;
+
+ case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP:
+ /* See also SHADER_OPCODE_UNTYPED_ATOMIC */
+ assert(!is_haswell);
+ latency = 14000;
+ break;
+
+ default:
+ unreachable("Unknown data cache message");
+ }
+
+ case HSW_SFID_DATAPORT_DATA_CACHE_1:
+ switch ((inst->desc >> 14) & 0x1f) {
+ case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
+ case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE:
+ case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ:
+ case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE:
+ case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ:
+ case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE:
+ /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
+ latency = 300;
+ break;
+
+ case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP:
+ case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2:
+ case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
+ case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP:
+ case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
+ /* See also SHADER_OPCODE_UNTYPED_ATOMIC */
+ latency = 14000;
+ break;
+
+ default:
+ unreachable("Unknown data cache message");
+ }
+ break;
+
default:
unreachable("Unknown SFID");
}
--
2.19.1
More information about the mesa-dev
mailing list