[Mesa-dev] [PATCH 19/21] i965/fs: Add an opcode for loading indirect push constants
Jason Ekstrand
jason at jlekstrand.net
Wed Aug 19 22:45:54 PDT 2015
This commit adds an FS_OPCODE_PUSH_CONSTANT_LOAD opcode which allows you to
load an indirect push constant. The first argument to the function is a
non-indirect uniform, the second is the indirect, and the third is an
immediate value that provides a bound on the indirect. This way we can
provide accurate regs_read() information to optimization passes and things
that need to think about interference.
---
src/mesa/drivers/dri/i965/brw_defines.h | 17 +++++++
src/mesa/drivers/dri/i965/brw_fs.cpp | 23 +++++++++
src/mesa/drivers/dri/i965/brw_fs.h | 4 ++
src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 1 +
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 70 ++++++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +
6 files changed, 117 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 82a3635..f7f0a2e 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1029,6 +1029,23 @@ enum opcode {
FS_OPCODE_LINTERP,
FS_OPCODE_PIXEL_X,
FS_OPCODE_PIXEL_Y,
+
+ /**
+ * Loads a uniform push constant with an indirect. This opcode takes four
+ * arguments:
+ *
+ * 0) The uniform register to load only without a NULL reladdr
+ * 1) An immediate base offset (in bytes)
+ * 2) A register indirect offset (in bytes)
+ * 3) The immediate value representing the maximum possible total offset.
+ *
+ * The base offset and indirect offset are added together to get a the
+ * total offset which is then added to the starting address of the register
+ * in src0. The reason for the multiplicity of arguments is so that the
+ * range [reg, reg + regs_read()) is an accurate representation of all of
+ * the values that could be read by the instruction.
+ */
+ FS_OPCODE_PUSH_CONSTANT_LOAD,
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 3d55dc8..60c9a0f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -796,6 +796,25 @@ fs_inst::regs_read(int arg) const
case CS_OPCODE_CS_TERMINATE:
return 1;
+ case FS_OPCODE_PUSH_CONSTANT_LOAD:
+ if (arg == 0) {
+ assert(src[3].file == IMM);
+ unsigned max_indirect = src[3].fixed_hw_reg.dw1.ud;
+
+ if (src[0].file == UNIFORM) {
+ return (max_indirect / 4) + 1;
+ } else {
+ /* This is the case after assign_curb_setup() */
+ assert(src[0].file == HW_REG);
+
+ struct brw_reg reg = src[0].fixed_hw_reg;
+ unsigned base_offset = reg.nr * REG_SIZE + reg.subnr;
+ unsigned max_offset = base_offset + max_indirect;
+ return (max_offset / REG_SIZE) - (base_offset / REG_SIZE) + 1;
+ }
+ }
+ break;
+
default:
if (is_tex() && arg == 0 && src[0].file == GRF)
return mlen;
@@ -4233,6 +4252,10 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
return 8;
+ case FS_OPCODE_PUSH_CONSTANT_LOAD:
+ /* Prior to BDW, we only have 8 address registers */
+ return devinfo->gen < 8 ? 8 : inst->exec_size;
+
default:
return inst->exec_size;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 90c9756..6bc434a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -444,6 +444,10 @@ private:
void generate_scratch_write(fs_inst *inst, struct brw_reg src);
void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
+ void generate_push_constant_load(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg reg,
+ struct brw_reg base_offset,
+ struct brw_reg indirect);
void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index c7628dc..cdc6c10 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -71,6 +71,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
case BRW_OPCODE_PLN:
case BRW_OPCODE_MAD:
case BRW_OPCODE_LRP:
+ case FS_OPCODE_PUSH_CONSTANT_LOAD:
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c86ca04..956bfb8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1031,6 +1031,72 @@ fs_generator::generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst)
}
void
+fs_generator::generate_push_constant_load(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg reg,
+ struct brw_reg base_offset_reg,
+ struct brw_reg indirect)
+{
+ assert(base_offset_reg.file == BRW_IMMEDIATE_VALUE);
+ unsigned base_offset = base_offset_reg.dw1.ud;
+
+ /* Add in the register position to get the absolute offset */
+ base_offset += reg.nr * REG_SIZE + reg.subnr;
+
+ assert(indirect.type == BRW_REGISTER_TYPE_D ||
+ indirect.type == BRW_REGISTER_TYPE_UD);
+
+ if (indirect.file == BRW_IMMEDIATE_VALUE) {
+ base_offset += indirect.dw1.d;
+
+ reg.nr = base_offset / REG_SIZE;
+ reg.subnr = base_offset % REG_SIZE;
+ brw_MOV(p, dst, reg);
+ } else {
+ struct brw_reg addr = vec8(brw_address_reg(0));
+
+ /* The destination stride of an instruction (in bytes) must be greater
+ * than or equal to the size of the rest of the instruction. Since the
+ * address register is of type UW, we can't use a D-type instruction.
+ * In order to get around this, re re-type to UW and use a stride.
+ */
+ indirect = spread(indirect, 2);
+ indirect.type = BRW_REGISTER_TYPE_UW;
+
+ if (devinfo->gen < 8) {
+ /* Prior to Broadwell, there are a couple silly restrictions that
+ * we have to work around. First, we only have 8 address register
+ * entries so this is SIMD8-only.
+ */
+ assert(inst->exec_size <= 8);
+
+ /* Finally, the bottom 5 bits of the base offset and the bottom 5
+ * bits of the indirect must add to less than 32. In other words,
+ * the hardware needs to be able to add the bottom five bits of the
+ * two to get the subnumber and add the next 7 bits of each to get
+ * the actual register number. Since uniforms frequently cross
+ * register boundaries, this makes it almost useless. We could try
+ * and do something clever where we use a actual base offset if
+ * base_offset % 32 == 0 but that would mean we were generating
+ * different code depending on the base offset. Instead, for the
+ * sake of consistency, we'll just do the add ourselves.
+ */
+ brw_ADD(p, addr, indirect, brw_imm_uw(base_offset));
+ base_offset = 0;
+ } else {
+ /* On Broadwell and above, we have 16 address registers and
+ * everything seems to "just work".
+ */
+ brw_MOV(p, addr, indirect);
+ }
+
+ /* Get a VxH indirect for a0.0. */
+ struct brw_reg src = brw_VxH_indirect(0, base_offset);
+
+ brw_MOV(p, dst, retype(src, dst.type));
+ }
+}
+
+void
fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
struct brw_reg dst,
struct brw_reg index,
@@ -1951,6 +2017,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_urb_write(inst, src[0]);
break;
+ case FS_OPCODE_PUSH_CONSTANT_LOAD:
+ generate_push_constant_load(inst, dst, src[0], src[1], src[2]);
+ break;
+
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index a7453fa..fdbcca5 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -695,6 +695,8 @@ brw_instruction_name(enum opcode op)
case FS_OPCODE_PIXEL_Y:
return "pixel_y";
+ case FS_OPCODE_PUSH_CONSTANT_LOAD:
+ return "push_const";
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return "uniform_pull_const";
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
--
2.4.3
More information about the mesa-dev
mailing list