[Mesa-dev] [PATCH 7/9] i965/gs: Add opcodes needed for EndPrimitive().
Kenneth Graunke
kenneth at whitecape.org
Mon Sep 9 23:39:48 PDT 2013
On 09/09/2013 08:20 AM, Paul Berry wrote:
> ---
> src/mesa/drivers/dri/i965/brw_defines.h | 21 ++++++++
> src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++
> src/mesa/drivers/dri/i965/brw_vec4.h | 2 +
> src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 81 +++++++++++++++++++++++++++++
> 4 files changed, 108 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 4742103..7b53c68 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -847,6 +847,27 @@ enum opcode {
> * scratch reads and writes to operate correctly.
> */
> GS_OPCODE_SET_DWORD_2_IMMED,
> +
> + /**
> + * Prepare the dst register for storage in the "Channel Mask" fields of a
> + * URB_WRITE message header.
> + *
> + * DWORD 4 of dst is shifted left by 4 bits, so that later,
> + * GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the
> + * final channel mask.
> + */
> + GS_OPCODE_PREPARE_CHANNEL_MASKS,
> +
> + /**
> + * Set the "Channel Mask" fields of a URB_WRITE message header.
> + *
> + * - dst is the MRF containing the message header.
> + *
> + * - src.x is the channel mask, as prepared by
> + * GS_OPCODE_PREPARE_CHANNEL_MASKS. DWORDs 0 and 4 are OR'ed together to
> + * form the final channel mask.
> + */
> + GS_OPCODE_SET_CHANNEL_MASKS,
> };
>
> #define BRW_PREDICATE_NONE 0
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index e7dbdbe..53364a5 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -507,6 +507,10 @@ brw_instruction_name(enum opcode op)
> return "set_vertex_count";
> case GS_OPCODE_SET_DWORD_2_IMMED:
> return "set_dword_2_immed";
> + case GS_OPCODE_PREPARE_CHANNEL_MASKS:
> + return "prepare_channel_masks";
> + case GS_OPCODE_SET_CHANNEL_MASKS:
> + return "set_channel_masks";
>
> default:
> /* Yes, this leaks. It's in debug code, it should never occur, and if
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
> index c5101d3..cba5cd4 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -610,6 +610,8 @@ private:
> void generate_gs_set_vertex_count(struct brw_reg dst,
> struct brw_reg src);
> void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src);
> + void generate_gs_prepare_channel_masks(struct brw_reg dst);
> + void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
> void generate_oword_dual_block_offsets(struct brw_reg m1,
> struct brw_reg index);
> void generate_scratch_write(vec4_instruction *inst,
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> index bf04bd9..12e1b50 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> @@ -516,6 +516,79 @@ vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
> }
>
> void
> +vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
> +{
> + /* We want to left shift just DWORD 4 (the x component belonging to the
> + * second geometry shader invocation) by 4 bits. So generate the
> + * instruction:
> + *
> + * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
> + */
> + dst = suboffset(vec1(dst), 4);
> + brw_push_insn_state(p);
> + brw_set_access_mode(p, BRW_ALIGN_1);
> + brw_set_mask_control(p, BRW_MASK_DISABLE);
> + brw_SHL(p, dst, dst, brw_imm_ud(4));
> + brw_pop_insn_state(p);
> +}
> +
> +void
> +vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
> + struct brw_reg src)
> +{
> + /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
> + * Header: M0.5):
> + *
> + * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
> + *
> + * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
> + * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
> + * Vertex 0 DATA[7]. This bit is ANDed with the corresponding
> + * channel enable to determine the final channel enable. For the
> + * URB_READ_OWORD & URB_READ_HWORD messages, when final channel
> + * enable is 1 it indicates that Vertex 1 DATA [3] will be included
> + * in the writeback message. For the URB_WRITE_OWORD &
> + * URB_WRITE_HWORD messages, when final channel enable is 1 it
> + * indicates that Vertex 1 DATA [3] will be written to the surface.
> + *
> + * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
> + * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
> + *
> + * 14 Vertex 1 DATA [2] Channel Mask
> + * 13 Vertex 1 DATA [1] Channel Mask
> + * 12 Vertex 1 DATA [0] Channel Mask
> + * 11 Vertex 0 DATA [3] Channel Mask
> + * 10 Vertex 0 DATA [2] Channel Mask
> + * 9 Vertex 0 DATA [1] Channel Mask
> + * 8 Vertex 0 DATA [0] Channel Mask
> + *
> + * (This is from a section of the PRM that is agnostic to the particular
> + * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
> + * geometry shader invocations 0 and 1, respectively). Since we have the
> + * enable flags for geometry shader invocation 0 in bits 0-3 of DWORD 0,
> + * and the enable flags for geometry shader invocation 1 in bits 4-7 of
> + * DWORD 4, we just need to OR them together and store the result in bits
> + * 15-8 of DWORD 5.
One thing isn't entirely clear to me here: are bits 7:4 of DWord 0 and
bits 3:0 of DWord 4 both zero? Otherwise, OR'ing them together will
result in a mishmash of the bits you want and...whatever other rubbish
is there.
I assume it works out, but maybe expand the comment to explain this?
One other nitpick: you list bits as 0-3, 4-7, and 15-8. Might be nice
to pick a consistent order. Usually the hardware docs use "High:Low".
Otherwise, I verified your math. Nice use of UB types :)
> + *
> + * It's easier to get the EU to do this if we think of the src and dst
> + * registers as composed of 32 bytes each; then, we want to pick up the
> + * contents of bytes 0 and 16 from src, OR them together, and store them in
> + * byte 21.
> + *
> + * We can do that by the following EU instruction:
> + *
> + * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
> + */
> + dst = retype(dst, BRW_REGISTER_TYPE_UB);
> + src = retype(src, BRW_REGISTER_TYPE_UB);
> + brw_push_insn_state(p);
> + brw_set_access_mode(p, BRW_ALIGN_1);
> + brw_set_mask_control(p, BRW_MASK_DISABLE);
> + brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
> + brw_pop_insn_state(p);
> +}
> +
> +void
> vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
> struct brw_reg index)
> {
> @@ -1003,6 +1076,14 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
> generate_gs_set_dword_2_immed(dst, src[0]);
> break;
>
> + case GS_OPCODE_PREPARE_CHANNEL_MASKS:
> + generate_gs_prepare_channel_masks(dst);
> + break;
> +
> + case GS_OPCODE_SET_CHANNEL_MASKS:
> + generate_gs_set_channel_masks(dst, src[0]);
> + break;
> +
> case SHADER_OPCODE_SHADER_TIME_ADD:
> brw_shader_time_add(p, src[0], SURF_INDEX_VEC4_SHADER_TIME);
> mark_surface_used(SURF_INDEX_VEC4_SHADER_TIME);
>
More information about the mesa-dev
mailing list