[Mesa-dev] [PATCH 4/6] intel/eu: Plumb header present bit to codegen helpers for HDC messages.
Francisco Jerez
currojerez at riseup.net
Tue Feb 27 21:38:26 UTC 2018
This makes sure that the header-present bit of the message descriptor
is in sync with the IR instruction fields, which gives the optimizer
more control to avoid the overhead of setting up a message header when
it's possible to do so.
---
src/intel/compiler/brw_eu.h | 18 ++++++++++++------
src/intel/compiler/brw_eu_emit.c | 30 ++++++++++++++++++------------
src/intel/compiler/brw_fs_generator.cpp | 20 ++++++++++++++------
src/intel/compiler/brw_vec4_generator.cpp | 11 ++++++-----
4 files changed, 50 insertions(+), 29 deletions(-)
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 2d0f56f7938..a5f28d8fc65 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -444,7 +444,8 @@ brw_untyped_atomic(struct brw_codegen *p,
struct brw_reg surface,
unsigned atomic_op,
unsigned msg_length,
- bool response_expected);
+ bool response_expected,
+ bool header_present);
void
brw_untyped_surface_read(struct brw_codegen *p,
@@ -459,7 +460,8 @@ brw_untyped_surface_write(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
- unsigned num_channels);
+ unsigned num_channels,
+ bool header_present);
void
brw_typed_atomic(struct brw_codegen *p,
@@ -468,7 +470,8 @@ brw_typed_atomic(struct brw_codegen *p,
struct brw_reg surface,
unsigned atomic_op,
unsigned msg_length,
- bool response_expected);
+ bool response_expected,
+ bool header_present);
void
brw_typed_surface_read(struct brw_codegen *p,
@@ -476,14 +479,16 @@ brw_typed_surface_read(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
- unsigned num_channels);
+ unsigned num_channels,
+ bool header_present);
void
brw_typed_surface_write(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
- unsigned num_channels);
+ unsigned num_channels,
+ bool header_present);
void
brw_byte_scattered_read(struct brw_codegen *p,
@@ -498,7 +503,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
- unsigned bit_size);
+ unsigned bit_size,
+ bool header_present);
void
brw_memory_fence(struct brw_codegen *p,
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 9fc6d12f288..9529a30d27e 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -2877,7 +2877,8 @@ brw_untyped_atomic(struct brw_codegen *p,
struct brw_reg surface,
unsigned atomic_op,
unsigned msg_length,
- bool response_expected)
+ bool response_expected,
+ bool header_present)
{
const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -2895,7 +2896,7 @@ brw_untyped_atomic(struct brw_codegen *p,
p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
brw_surface_payload_size(p, response_expected,
devinfo->gen >= 8 || devinfo->is_haswell, true),
- align1);
+ header_present);
brw_set_dp_untyped_atomic_message(
p, insn, atomic_op, response_expected);
@@ -2978,7 +2979,8 @@ brw_untyped_surface_write(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
- unsigned num_channels)
+ unsigned num_channels,
+ bool header_present)
{
const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -2990,7 +2992,7 @@ brw_untyped_surface_write(struct brw_codegen *p,
WRITEMASK_X : WRITEMASK_XYZW;
struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, brw_writemask(brw_null_reg(), mask),
- payload, surface, msg_length, 0, align1);
+ payload, surface, msg_length, 0, header_present);
brw_set_dp_untyped_surface_write_message(
p, insn, num_channels);
@@ -3048,7 +3050,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
- unsigned bit_size)
+ unsigned bit_size,
+ bool header_present)
{
const struct gen_device_info *devinfo = p->devinfo;
assert(devinfo->gen > 7 || devinfo->is_haswell);
@@ -3057,7 +3060,7 @@ brw_byte_scattered_write(struct brw_codegen *p,
struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, brw_writemask(brw_null_reg(), WRITEMASK_XYZW),
- payload, surface, msg_length, 0, true);
+ payload, surface, msg_length, 0, header_present);
unsigned msg_control =
brw_byte_scattered_data_element_from_bit_size(bit_size) << 2;
@@ -3113,7 +3116,8 @@ brw_typed_atomic(struct brw_codegen *p,
struct brw_reg surface,
unsigned atomic_op,
unsigned msg_length,
- bool response_expected) {
+ bool response_expected,
+ bool header_present) {
const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
@@ -3125,7 +3129,7 @@ brw_typed_atomic(struct brw_codegen *p,
p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
brw_surface_payload_size(p, response_expected,
devinfo->gen >= 8 || devinfo->is_haswell, false),
- true);
+ header_present);
brw_set_dp_typed_atomic_message(
p, insn, atomic_op, response_expected);
@@ -3169,7 +3173,8 @@ brw_typed_surface_read(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
- unsigned num_channels)
+ unsigned num_channels,
+ bool header_present)
{
const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -3179,7 +3184,7 @@ brw_typed_surface_read(struct brw_codegen *p,
p, sfid, dst, payload, surface, msg_length,
brw_surface_payload_size(p, num_channels,
devinfo->gen >= 8 || devinfo->is_haswell, false),
- true);
+ header_present);
brw_set_dp_typed_surface_read_message(
p, insn, num_channels);
@@ -3223,7 +3228,8 @@ brw_typed_surface_write(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
- unsigned num_channels)
+ unsigned num_channels,
+ bool header_present)
{
const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -3235,7 +3241,7 @@ brw_typed_surface_write(struct brw_codegen *p,
WRITEMASK_X : WRITEMASK_XYZW);
struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, brw_writemask(brw_null_reg(), mask),
- payload, surface, msg_length, 0, true);
+ payload, surface, msg_length, 0, header_present);
brw_set_dp_typed_surface_write_message(
p, insn, num_channels);
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 1aed7fb850f..b85173881c9 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2069,10 +2069,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case SHADER_OPCODE_UNTYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud,
- inst->mlen, !inst->dst.is_null());
+ inst->mlen, !inst->dst.is_null(),
+ inst->header_size);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ assert(!inst->header_size);
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1],
inst->mlen, src[2].ud);
@@ -2081,10 +2083,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_write(p, src[0], src[1],
- inst->mlen, src[2].ud);
+ inst->mlen, src[2].ud,
+ inst->header_size);
break;
case SHADER_OPCODE_BYTE_SCATTERED_READ:
+ assert(!inst->header_size);
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_byte_scattered_read(p, dst, src[0], src[1],
inst->mlen, src[2].ud);
@@ -2093,24 +2097,28 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_byte_scattered_write(p, src[0], src[1],
- inst->mlen, src[2].ud);
+ inst->mlen, src[2].ud,
+ inst->header_size);
break;
case SHADER_OPCODE_TYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_atomic(p, dst, src[0], src[1],
- src[2].ud, inst->mlen, !inst->dst.is_null());
+ src[2].ud, inst->mlen, !inst->dst.is_null(),
+ inst->header_size);
break;
case SHADER_OPCODE_TYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_read(p, dst, src[0], src[1],
- inst->mlen, src[2].ud);
+ inst->mlen, src[2].ud,
+ inst->header_size);
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud);
+ brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud,
+ inst->header_size);
break;
case SHADER_OPCODE_MEMORY_FENCE:
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
index 6fa6e35b24a..ecf9ed0ba3a 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1869,10 +1869,11 @@ generate_code(struct brw_codegen *p,
case SHADER_OPCODE_UNTYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
- !inst->dst.is_null());
+ !inst->dst.is_null(), inst->header_size);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ assert(!inst->header_size);
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
src[2].ud);
@@ -1881,25 +1882,25 @@ generate_code(struct brw_codegen *p,
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
- src[2].ud);
+ src[2].ud, inst->header_size);
break;
case SHADER_OPCODE_TYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
- !inst->dst.is_null());
+ !inst->dst.is_null(), inst->header_size);
break;
case SHADER_OPCODE_TYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_read(p, dst, src[0], src[1], inst->mlen,
- src[2].ud);
+ src[2].ud, inst->header_size);
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_write(p, src[0], src[1], inst->mlen,
- src[2].ud);
+ src[2].ud, inst->header_size);
break;
case SHADER_OPCODE_MEMORY_FENCE:
--
2.16.1
More information about the mesa-dev
mailing list