[Mesa-dev] [PATCH v2] i965/skl: Always use a header for SIMD4x2 sampler messages
Kristian Høgsberg
krh at bitplanet.net
Wed Jan 7 22:43:25 PST 2015
SKL+ overloads the SIMD4x2 SIMD mode to mean either SIMD8D or SIMD4x2
depending on bit 22 in the message header. If the bit is 0 or there is
no header we get SIMD8D. We always wand SIMD4x2 in vec4 and for fs pull
constants, so use a message header in those cases and set bit 22 there.
Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
---
src/mesa/drivers/dri/i965/brw_defines.h | 5 ++++
src/mesa/drivers/dri/i965/brw_fs.cpp | 8 ++++++
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 32 +++++++++++++++++++-----
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 ++++++++---
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++-
5 files changed, 53 insertions(+), 11 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 28e398d..f02a0b8 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1373,6 +1373,11 @@ enum brw_message_target {
#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
+/* GEN9 changes SIMD mode 0 to mean SIMD8D, but lets us get the SIMD4x2
+ * behavior by setting bit 22 of dword 2 in the message header. */
+#define GEN9_SAMPLER_SIMD_MODE_SIMD8D 0
+#define GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2 (1 << 22)
+
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8c7d780..9dfb7b7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2994,6 +2994,14 @@ fs_visitor::lower_uniform_pull_constant_loads()
const_offset_reg.fixed_hw_reg.dw1.ud /= 4;
fs_reg payload = fs_reg(this, glsl_type::uint_type);
+ /* We have to use a message header on Skylake to get SIMD4x2 mode.
+ * Reserve space for the register.
+ */
+ if (brw->gen >= 9) {
+ payload.reg_offset++;
+ virtual_grf_sizes[payload.reg] = 2;
+ }
+
/* This is actually going to be a MOV, but since only the first dword
* is accessed, we have a special opcode to do just that one. Note
* that this needs to be an operation that will be considered a def
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c652d65..7b4ac8d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1017,6 +1017,26 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
*/
dst.width = BRW_WIDTH_4;
+ struct brw_reg src = offset;
+ bool header_present = false;
+ int mlen = 1;
+
+ if (brw->gen >= 9) {
+ /* Skylake requires a message header in order to use SIMD4x2 mode. */
+ src = retype(brw_vec8_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD);
+ mlen = 2;
+ header_present = true;
+
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, src, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+ brw_MOV(p, get_element_ud(src, 2),
+ brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2));
+ brw_pop_insn_state(p);
+ }
+
if (index.file == BRW_IMMEDIATE_VALUE) {
uint32_t surf_index = index.dw1.ud;
@@ -1028,14 +1048,14 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
brw_pop_insn_state(p);
brw_set_dest(p, send, dst);
- brw_set_src0(p, send, offset);
+ brw_set_src0(p, send, src);
brw_set_sampler_message(p, send,
surf_index,
0, /* LD message ignores sampler unit */
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1, /* rlen */
- 1, /* mlen */
- false, /* no header */
+ mlen,
+ header_present,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
@@ -1064,8 +1084,8 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
0 /* sampler */,
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1 /* rlen */,
- 1 /* mlen */,
- false /* header */,
+ mlen,
+ header_present,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
@@ -1077,7 +1097,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
/* dst = send(offset, a0.0) */
brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, insn_send, dst);
- brw_set_src0(p, insn_send, offset);
+ brw_set_src0(p, insn_send, src);
brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);
brw_pop_insn_state(p);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index b88a579..19e82ef 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -328,6 +328,7 @@ vec4_generator::generate_tex(vec4_instruction *inst,
} else {
struct brw_reg header =
retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
+ uint32_t dw2 = 0;
/* Explicitly set up the message header by copying g0 to the MRF. */
brw_push_insn_state(p);
@@ -336,11 +337,17 @@ vec4_generator::generate_tex(vec4_instruction *inst,
brw_set_default_access_mode(p, BRW_ALIGN_1);
- if (inst->offset) {
+ if (inst->offset)
/* Set the texel offset bits in DWord 2. */
- brw_MOV(p, get_element_ud(header, 2),
- brw_imm_ud(inst->offset));
- }
+ dw2 = inst->offset;
+
+ if (brw->gen >= 9)
+ /* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do SIMD8D,
+ * based on bit 22 in the header. */
+ dw2 |= GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2;
+
+ if (dw2)
+ brw_MOV(p, get_element_ud(header, 2), brw_imm_ud(dw2));
brw_adjust_sampler_state_pointer(p, header, sampler_index, dst);
brw_pop_insn_state(p);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 09d79c8..a81c66a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2586,12 +2586,14 @@ vec4_visitor::visit(ir_texture *ir)
/* The message header is necessary for:
* - Gen4 (always)
+ * - Gen9+ for selecting SIMD4x2
* - Texel offsets
* - Gather channel selection
* - Sampler indices too large to fit in a 4-bit value.
*/
inst->header_present =
- brw->gen < 5 || inst->offset != 0 || ir->op == ir_tg4 ||
+ brw->gen < 5 || brw->gen >= 9 ||
+ inst->offset != 0 || ir->op == ir_tg4 ||
is_high_sampler(brw, sampler_reg);
inst->base_mrf = 2;
inst->mlen = inst->header_present + 1; /* always at least one */
--
2.1.0
More information about the mesa-dev
mailing list