[Mesa-dev] [PATCH 24/41] i965/fs: Add an exec_size field to fs_inst
Jason Ekstrand
jason at jlekstrand.net
Sat Sep 20 10:23:13 PDT 2014
This will, eventually, allow us to manage execution sizes of instructions
in a much more natural way from the fs_visitor level.
Signed-off-by: Jason Ekstrand <jason.ekstrand at intel.com>
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 69 +++++++++++++++++++++++-----
src/mesa/drivers/dri/i965/brw_fs.h | 18 ++++++--
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
3 files changed, 74 insertions(+), 15 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index af7e1c9..f6c9b46 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -53,7 +53,8 @@ extern "C" {
#include "glsl/glsl_types.h"
void
-fs_inst::init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources)
+fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
+ fs_reg *src, int sources)
{
memset(this, 0, sizeof(*this));
@@ -61,6 +62,33 @@ fs_inst::init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources)
this->dst = dst;
this->src = src;
this->sources = sources;
+ this->exec_size = exec_size;
+
+ assert(dst.file != IMM && dst.file != UNIFORM);
+
+ /* If exec_size == 0, try to guess it from the registers. Since all
+ * manner of things may use hardware registers, we first try to guess
+ * based on GRF registers. If this fails, we will go ahead and take the
+ * width from the destination register.
+ */
+ if (this->exec_size == 0) {
+ if (dst.file == GRF) {
+ this->exec_size = dst.width;
+ } else {
+ for (int i = 0; i < sources; ++i) {
+ if (src[i].file != GRF)
+ continue;
+
+ if (this->exec_size <= 1)
+ this->exec_size = src[i].width;
+ assert(src[i].width == 1 || src[i].width == this->exec_size);
+ }
+ }
+
+ if (this->exec_size == 0 && dst.file != BAD_FILE)
+ this->exec_size = dst.width;
+ }
+ assert(this->exec_size != 0);
this->conditional_mod = BRW_CONDITIONAL_NONE;
@@ -86,17 +114,29 @@ fs_inst::init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources)
this->writes_accumulator = false;
}
+fs_inst::fs_inst()
+{
+ fs_reg *src = ralloc_array(this, fs_reg, 3);
+ init(BRW_OPCODE_NOP, 8, dst, src, 0);
+}
+
+fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size)
+{
+ fs_reg *src = ralloc_array(this, fs_reg, 3);
+ init(opcode, exec_size, reg_undef, src, 0);
+}
+
fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst)
{
fs_reg *src = ralloc_array(this, fs_reg, 3);
- init(opcode, dst, src, 0);
+ init(opcode, 0, dst, src, 0);
}
fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0)
{
fs_reg *src = ralloc_array(this, fs_reg, 3);
src[0] = src0;
- init(opcode, dst, src, 1);
+ init(opcode, 0, dst, src, 1);
}
fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
@@ -105,7 +145,7 @@ fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
fs_reg *src = ralloc_array(this, fs_reg, 3);
src[0] = src0;
src[1] = src1;
- init(opcode, dst, src, 2);
+ init(opcode, 0, dst, src, 2);
}
fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
@@ -115,12 +155,18 @@ fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
src[0] = src0;
src[1] = src1;
src[2] = src2;
- init(opcode, dst, src, 3);
+ init(opcode, 0, dst, src, 3);
}
fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, fs_reg src[], int sources)
{
- init(opcode, dst, src, sources);
+ init(opcode, 0, dst, src, sources);
+}
+
+fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
+ fs_reg src[], int sources)
+{
+ init(opcode, exec_width, dst, src, sources);
}
fs_inst::fs_inst(const fs_inst &that)
@@ -208,7 +254,7 @@ ALU2(MAC)
fs_inst *
fs_visitor::IF(enum brw_predicate predicate)
{
- fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF);
+ fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width);
inst->predicate = predicate;
return inst;
}
@@ -219,7 +265,7 @@ fs_visitor::IF(const fs_reg &src0, const fs_reg &src1,
enum brw_conditional_mod condition)
{
assert(brw->gen == 6);
- fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF,
+ fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width,
reg_null_d, src0, src1);
inst->conditional_mod = condition;
return inst;
@@ -382,6 +428,7 @@ fs_inst::equals(fs_inst *inst) const
eot == inst->eot &&
header_present == inst->header_present &&
shadow_compare == inst->shadow_compare &&
+ exec_size == inst->exec_size &&
offset == inst->offset);
}
@@ -762,7 +809,7 @@ fs_visitor::no16(const char *format, ...)
fs_inst *
fs_visitor::emit(enum opcode opcode)
{
- return emit(new(mem_ctx) fs_inst(opcode));
+ return emit(new(mem_ctx) fs_inst(opcode, dispatch_width));
}
fs_inst *
@@ -2120,7 +2167,7 @@ fs_visitor::demote_pull_constants()
} else {
fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
fs_inst *pull =
- new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8,
dst, surf_index, offset);
inst->insert_before(block, pull);
inst->src[i].set_smear(pull_index & 3);
@@ -2838,7 +2885,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
* by live variable analysis, or register allocation will explode.
*/
fs_inst *setup = new(mem_ctx) fs_inst(FS_OPCODE_SET_SIMD4X2_OFFSET,
- payload, const_offset_reg);
+ 8, payload, const_offset_reg);
setup->force_writemask_all = true;
setup->ir = inst->ir;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 4b90a2a..bb98717 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -193,18 +193,23 @@ public:
class fs_inst : public backend_instruction {
fs_inst &operator=(const fs_inst &);
+ void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
+ fs_reg *src, int sources);
+
public:
DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
- void init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources);
-
- fs_inst(enum opcode opcode = BRW_OPCODE_NOP, const fs_reg &dst = reg_undef);
+ fs_inst();
+ fs_inst(enum opcode opcode, uint8_t exec_size);
+ fs_inst(enum opcode opcode, const fs_reg &dst);
fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0);
fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
const fs_reg &src1);
fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
const fs_reg &src1, const fs_reg &src2);
fs_inst(enum opcode opcode, const fs_reg &dst, fs_reg src[], int sources);
+ fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
+ fs_reg src[], int sources);
fs_inst(const fs_inst &that);
void resize_sources(uint8_t num_sources);
@@ -224,6 +229,13 @@ public:
uint8_t sources; /**< Number of fs_reg sources. */
+ /**
+ * Execution size of the instruction. This is used by the generator to
+ * generate the correct binary for the given fs_inst. Current valid
+ * values are 1, 8, 16.
+ */
+ uint8_t exec_size;
+
/* Chooses which flag subregister (f0.0 or f0.1) is used for conditional
* mod and predication.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 779b3a2..d519a3a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -939,7 +939,7 @@ fs_visitor::visit(ir_expression *ir)
packed_consts.type = result.type;
fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
- emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8,
packed_consts, surf_index, const_offset_reg));
for (int i = 0; i < ir->type->vector_elements; i++) {
--
2.1.0
More information about the mesa-dev
mailing list