[Mesa-dev] [PATCH 05/14] i965: Add SIMD8 URB write low-level IR instruction
Kristian Høgsberg
krh at bitplanet.net
Tue Oct 28 15:17:48 PDT 2014
This is all we need from the generator for SIMD8 vertex shaders. This
opcode is just the send instruction, all the hard work will happen
in the visitor using LOAD_PAYLOAD.
Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
---
src/mesa/drivers/dri/i965/brw_defines.h | 1 +
src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++++
src/mesa/drivers/dri/i965/brw_fs.h | 1 +
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 25 +++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 16 ++++++++++++++-
src/mesa/drivers/dri/i965/brw_shader.cpp | 1 +
6 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index ab45d3d..bc7304b 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1520,6 +1520,7 @@ enum brw_message_target {
#define BRW_URB_OPCODE_WRITE_HWORD 0
#define BRW_URB_OPCODE_WRITE_OWORD 1
+#define BRW_URB_OPCODE_SIMD8_WRITE 7
#define BRW_URB_SWIZZLE_NONE 0
#define BRW_URB_SWIZZLE_INTERLEAVE 1
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 97fefff..815c8c2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -509,6 +509,7 @@ fs_inst::is_send_from_grf() const
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case VS_OPCODE_URB_WRITE:
return true;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return src[1].file == GRF;
@@ -919,6 +920,8 @@ fs_inst::regs_read(fs_visitor *v, int arg) const
return mlen;
} else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) {
return mlen;
+ } else if (opcode == VS_OPCODE_URB_WRITE && arg == 0) {
+ return mlen;
} else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) {
return mlen;
} else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) {
@@ -1013,6 +1016,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
+ case VS_OPCODE_URB_WRITE:
return 0;
default:
unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 117b765..67a5cdd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -713,6 +713,7 @@ private:
struct brw_reg implied_header,
GLuint nr);
void generate_fb_write(fs_inst *inst, struct brw_reg payload);
+ void generate_urb_write(fs_inst *inst, struct brw_reg payload);
void generate_blorp_fb_write(fs_inst *inst);
void generate_pixel_xy(struct brw_reg dst, bool is_x);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index cb1d922..a463386 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -236,6 +236,27 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
}
void
+fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
+{
+ brw_inst *insn;
+
+ insn = brw_next_insn(p, BRW_OPCODE_SEND);
+
+ brw_set_dest(p, insn, brw_null_reg());
+ brw_set_src0(p, insn, payload);
+ brw_set_src1(p, insn, brw_imm_d(0));
+
+ brw_inst_set_sfid(brw, insn, BRW_SFID_URB);
+ brw_inst_set_urb_opcode(brw, insn, BRW_URB_OPCODE_SIMD8_WRITE);
+
+ brw_inst_set_mlen(brw, insn, inst->mlen);
+ brw_inst_set_rlen(brw, insn, 0);
+ brw_inst_set_eot(brw, insn, inst->eot);
+ brw_inst_set_header_present(brw, insn, true);
+ brw_inst_set_urb_global_offset(brw, insn, inst->offset);
+}
+
+void
fs_generator::generate_blorp_fb_write(fs_inst *inst)
{
brw_fb_WRITE(p,
@@ -1892,6 +1913,10 @@ fs_generator::generate_code(const cfg_t *cfg)
generate_fb_write(inst, src[0]);
break;
+ case VS_OPCODE_URB_WRITE:
+ generate_urb_write(inst, src[0]);
+ break;
+
case FS_OPCODE_BLORP_FB_WRITE:
generate_blorp_fb_write(inst);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 44c74a3..bdbf3de 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -386,6 +386,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
/* Special case instructions which have extra implied registers used. */
switch (inst->opcode) {
case FS_OPCODE_FB_WRITE:
+ case VS_OPCODE_URB_WRITE:
/* We could omit this for the !inst->header_present case, except that
* the simulator apparently incorrectly reads from g0/g1 instead of
* sideband. It also really freaks out driver developers to see g0
@@ -522,6 +523,19 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
}
}
+static bool
+is_last_send(fs_inst *inst)
+{
+ switch (inst->opcode) {
+ case FS_OPCODE_FB_WRITE:
+ case VS_OPCODE_URB_WRITE:
+ return inst->eot;
+ default:
+ assert(!inst->eot);
+ return false;
+ }
+}
+
bool
fs_visitor::assign_regs(bool allow_spilling)
{
@@ -594,7 +608,7 @@ fs_visitor::assign_regs(bool allow_spilling)
* We could just do "something high". Instead, we just pick the
* highest register that works.
*/
- if (inst->opcode == FS_OPCODE_FB_WRITE && inst->eot) {
+ if (is_last_send(inst)) {
int size = virtual_grf_sizes[inst->src[0].reg];
int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
ra_set_node_reg(g, inst->src[0].reg, reg);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 21dcf2d..278e404 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -725,6 +725,7 @@ backend_instruction::has_side_effects() const
switch (opcode) {
case SHADER_OPCODE_UNTYPED_ATOMIC:
case FS_OPCODE_FB_WRITE:
+ case VS_OPCODE_URB_WRITE:
return true;
default:
return false;
--
2.1.0
More information about the mesa-dev
mailing list