[Mesa-dev] [PATCH 7/9] i965/gs: Add opcodes needed for EndPrimitive().
Paul Berry
stereotype441 at gmail.com
Mon Sep 9 08:20:44 PDT 2013
---
src/mesa/drivers/dri/i965/brw_defines.h | 21 ++++++++
src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++
src/mesa/drivers/dri/i965/brw_vec4.h | 2 +
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 81 +++++++++++++++++++++++++++++
4 files changed, 108 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 4742103..7b53c68 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -847,6 +847,27 @@ enum opcode {
* scratch reads and writes to operate correctly.
*/
GS_OPCODE_SET_DWORD_2_IMMED,
+
+ /**
+ * Prepare the dst register for storage in the "Channel Mask" fields of a
+ * URB_WRITE message header.
+ *
+ * DWORD 4 of dst is shifted left by 4 bits, so that later,
+ * GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the
+ * final channel mask.
+ */
+ GS_OPCODE_PREPARE_CHANNEL_MASKS,
+
+ /**
+ * Set the "Channel Mask" fields of a URB_WRITE message header.
+ *
+ * - dst is the MRF containing the message header.
+ *
+ * - src.x is the channel mask, as prepared by
+ * GS_OPCODE_PREPARE_CHANNEL_MASKS. DWORDs 0 and 4 are OR'ed together to
+ * form the final channel mask.
+ */
+ GS_OPCODE_SET_CHANNEL_MASKS,
};
#define BRW_PREDICATE_NONE 0
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index e7dbdbe..53364a5 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -507,6 +507,10 @@ brw_instruction_name(enum opcode op)
return "set_vertex_count";
case GS_OPCODE_SET_DWORD_2_IMMED:
return "set_dword_2_immed";
+ case GS_OPCODE_PREPARE_CHANNEL_MASKS:
+ return "prepare_channel_masks";
+ case GS_OPCODE_SET_CHANNEL_MASKS:
+ return "set_channel_masks";
default:
/* Yes, this leaks. It's in debug code, it should never occur, and if
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index c5101d3..cba5cd4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -610,6 +610,8 @@ private:
void generate_gs_set_vertex_count(struct brw_reg dst,
struct brw_reg src);
void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src);
+ void generate_gs_prepare_channel_masks(struct brw_reg dst);
+ void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
void generate_oword_dual_block_offsets(struct brw_reg m1,
struct brw_reg index);
void generate_scratch_write(vec4_instruction *inst,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index bf04bd9..12e1b50 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -516,6 +516,79 @@ vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
}
void
+vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
+{
+ /* We want to left shift just DWORD 4 (the x component belonging to the
+ * second geometry shader invocation) by 4 bits. So generate the
+ * instruction:
+ *
+ * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
+ */
+ dst = suboffset(vec1(dst), 4);
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_SHL(p, dst, dst, brw_imm_ud(4));
+ brw_pop_insn_state(p);
+}
+
+void
+vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
+ struct brw_reg src)
+{
+ /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
+ * Header: M0.5):
+ *
+ * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
+ *
+ * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
+ * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
+ * Vertex 0 DATA[7]. This bit is ANDed with the corresponding
+ * channel enable to determine the final channel enable. For the
+ * URB_READ_OWORD & URB_READ_HWORD messages, when final channel
+ * enable is 1 it indicates that Vertex 1 DATA [3] will be included
+ * in the writeback message. For the URB_WRITE_OWORD &
+ * URB_WRITE_HWORD messages, when final channel enable is 1 it
+ * indicates that Vertex 1 DATA [3] will be written to the surface.
+ *
+ * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
+ * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
+ *
+ * 14 Vertex 1 DATA [2] Channel Mask
+ * 13 Vertex 1 DATA [1] Channel Mask
+ * 12 Vertex 1 DATA [0] Channel Mask
+ * 11 Vertex 0 DATA [3] Channel Mask
+ * 10 Vertex 0 DATA [2] Channel Mask
+ * 9 Vertex 0 DATA [1] Channel Mask
+ * 8 Vertex 0 DATA [0] Channel Mask
+ *
+ * (This is from a section of the PRM that is agnostic to the particular
+ * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
+ * geometry shader invocations 0 and 1, respectively). Since we have the
+ * enable flags for geometry shader invocation 0 in bits 0-3 of DWORD 0,
+ * and the enable flags for geometry shader invocation 1 in bits 4-7 of
+ * DWORD 4, we just need to OR them together and store the result in bits
+ * 15-8 of DWORD 5.
+ *
+ * It's easier to get the EU to do this if we think of the src and dst
+ * registers as composed of 32 bytes each; then, we want to pick up the
+ * contents of bytes 0 and 16 from src, OR them together, and store them in
+ * byte 21.
+ *
+ * We can do that by the following EU instruction:
+ *
+ * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
+ */
+ dst = retype(dst, BRW_REGISTER_TYPE_UB);
+ src = retype(src, BRW_REGISTER_TYPE_UB);
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
+ brw_pop_insn_state(p);
+}
+
+void
vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
struct brw_reg index)
{
@@ -1003,6 +1076,14 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
generate_gs_set_dword_2_immed(dst, src[0]);
break;
+ case GS_OPCODE_PREPARE_CHANNEL_MASKS:
+ generate_gs_prepare_channel_masks(dst);
+ break;
+
+ case GS_OPCODE_SET_CHANNEL_MASKS:
+ generate_gs_set_channel_masks(dst, src[0]);
+ break;
+
case SHADER_OPCODE_SHADER_TIME_ADD:
brw_shader_time_add(p, src[0], SURF_INDEX_VEC4_SHADER_TIME);
mark_surface_used(SURF_INDEX_VEC4_SHADER_TIME);
--
1.8.4
More information about the mesa-dev
mailing list