[Mesa-dev] [PATCH 5/9] i965/fs: Expose arbitrary pull constant load sizes to the IR.
Francisco Jerez
currojerez at riseup.net
Fri Dec 9 19:03:28 UTC 2016
Change the FS generator to ask the dataport for enough owords worth of
constants to fill the execution size of the instruction -- Which means
that the visitor now needs to set the execution size correctly for
uniform pull constant load instructions, which we were kind of
neglecting until now.
---
src/mesa/drivers/dri/i965/brw_eu_emit.c | 15 +++++++-------
src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 27 ++++++++++++--------------
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 9 +++++----
4 files changed, 26 insertions(+), 27 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 6141bfb..8536a13 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2256,7 +2256,7 @@ gen7_block_read_scratch(struct brw_codegen *p,
}
/**
- * Read a float[4] vector from the data port constant cache.
+ * Read float[4] vectors from the data port constant cache.
* Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
*/
@@ -2270,6 +2270,7 @@ void brw_oword_block_read(struct brw_codegen *p,
const unsigned target_cache =
(devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
BRW_DATAPORT_READ_TARGET_DATA_CACHE);
+ const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
/* On newer hardware, offset is in units of owords. */
if (devinfo->gen >= 6)
@@ -2278,11 +2279,12 @@ void brw_oword_block_read(struct brw_codegen *p,
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
- brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_push_insn_state(p);
+ brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
@@ -2291,6 +2293,7 @@ void brw_oword_block_read(struct brw_codegen *p,
mrf.nr,
2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(offset));
+ brw_pop_insn_state(p);
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
@@ -2305,15 +2308,13 @@ void brw_oword_block_read(struct brw_codegen *p,
brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
}
- brw_set_dp_read_message(p,
- insn,
- bind_table_index,
- BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+ brw_set_dp_read_message(p, insn, bind_table_index,
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
target_cache,
1, /* msg_length */
true, /* header_present */
- 1); /* response_length (1 reg, 2 owords!) */
+ DIV_ROUND_UP(exec_size, 8)); /* response_length */
brw_pop_insn_state(p);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 819d256..b6a571a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2121,7 +2121,7 @@ fs_visitor::lower_constant_loads()
assert(inst->src[i].stride == 0);
- const fs_builder ubld = ibld.exec_all().group(8, 0);
+ const fs_builder ubld = ibld.exec_all().group(4, 0);
struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
dst, brw_imm_ud(index), offset);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 24bec5f..e73f2ca 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1127,6 +1127,7 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
struct brw_reg index,
struct brw_reg offset)
{
+ assert(type_sz(dst.type) == 4);
assert(inst->mlen != 0);
assert(index.file == BRW_IMMEDIATE_VALUE &&
@@ -1149,27 +1150,25 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
{
assert(index.type == BRW_REGISTER_TYPE_UD);
assert(payload.file == BRW_GENERAL_REGISTER_FILE);
+ assert(type_sz(dst.type) == 4);
if (index.file == BRW_IMMEDIATE_VALUE) {
const uint32_t surf_index = index.ud;
brw_push_insn_state(p);
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
- brw_inst_set_exec_size(devinfo, send, BRW_EXECUTE_4);
brw_pop_insn_state(p);
- brw_set_dest(p, send, vec4(retype(dst, BRW_REGISTER_TYPE_UD)));
- brw_set_src0(p, send, vec4(retype(payload, BRW_REGISTER_TYPE_UD)));
- brw_set_dp_read_message(p, send,
- surf_index,
- BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+ brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD));
+ brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
+ brw_set_dp_read_message(p, send, surf_index,
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
GEN6_SFID_DATAPORT_CONSTANT_CACHE,
1, /* mlen */
true, /* header */
- 1); /* rlen */
+ DIV_ROUND_UP(inst->size_written, REG_SIZE));
} else {
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@@ -1188,17 +1187,15 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
/* dst = send(payload, a0.0 | <descriptor>) */
brw_inst *insn = brw_send_indirect_message(
p, GEN6_SFID_DATAPORT_CONSTANT_CACHE,
- vec4(retype(dst, BRW_REGISTER_TYPE_UD)),
- vec4(retype(payload, BRW_REGISTER_TYPE_UD)), addr);
- brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4);
- brw_set_dp_read_message(p, insn,
- 0, /* surface */
- BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+ retype(dst, BRW_REGISTER_TYPE_UD),
+ retype(payload, BRW_REGISTER_TYPE_UD), addr);
+ brw_set_dp_read_message(p, insn, 0 /* surface */,
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
GEN6_SFID_DATAPORT_CONSTANT_CACHE,
1, /* mlen */
true, /* header */
- 1); /* rlen */
+ DIV_ROUND_UP(inst->size_written, REG_SIZE));
brw_pop_insn_state(p);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 855266f..7e00086 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -4059,7 +4059,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
* and we have to split it if necessary.
*/
const unsigned type_size = type_sz(dest.type);
- const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F);
+ const fs_builder ubld = bld.exec_all().group(4, 0);
+ const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_F);
+
for (unsigned c = 0; c < instr->num_components;) {
const unsigned base = const_offset->u32[0] + c * type_size;
@@ -4067,9 +4069,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
const unsigned count = MIN2(instr->num_components - c,
(16 - base % 16) / type_size);
- bld.exec_all()
- .emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- packed_consts, surf_index, brw_imm_ud(base & ~15));
+ ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ packed_consts, surf_index, brw_imm_ud(base & ~15));
const fs_reg consts =
retype(byte_offset(packed_consts, base & 15), dest.type);
--
2.10.2
More information about the mesa-dev
mailing list