[Mesa-dev] [PATCH 14/30] i965/gs: Add GS_OPCODE_URB_WRITE.
Kenneth Graunke
kenneth at whitecape.org
Wed Aug 21 11:05:19 PDT 2013
On 08/20/2013 11:30 AM, Paul Berry wrote:
> ---
> src/mesa/drivers/dri/i965/brw_defines.h | 9 +++++++++
> src/mesa/drivers/dri/i965/brw_eu.h | 6 ++++++
> src/mesa/drivers/dri/i965/brw_eu_emit.c | 4 ++--
> src/mesa/drivers/dri/i965/brw_shader.cpp | 5 ++++-
> src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 ++
> src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++-
> src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 23 +++++++++++++++++++++--
> 7 files changed, 46 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 2ab0a2b..16a1dbc 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -799,6 +799,15 @@ enum opcode {
> VS_OPCODE_PULL_CONSTANT_LOAD,
> VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
> VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
> +
> + /**
> + * Write geometry shader output data to the URB.
> + *
> + * Unlike VS_OPCODE_URB_WRITE, this opcode doesn't do an implied move from
> + * R0 to the first MRF. This allows the geometry shader to override the
> + * "Slot {0,1} Offset" fields in the message header.
> + */
> + GS_OPCODE_URB_WRITE,
> };
>
> #define BRW_PREDICATE_NONE 0
> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
> index ae4cab5..9053ea2 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu.h
> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
> @@ -252,6 +252,12 @@ enum brw_urb_write_flags {
> BRW_URB_WRITE_COMPLETE = 0x8,
>
> /**
> + * Indicates that an additional offset (which may be different for the two
> + * vec4 slots) is stored in the message header (gen == 7).
> + */
> + BRW_URB_WRITE_PER_SLOT_OFFSET = 0x10,
> +
> + /**
> * Convenient combination of flags: end the thread while simultaneously
> * marking the given URB entry as complete.
> */
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index 622b22f..b55b57e 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -531,8 +531,8 @@ static void brw_set_urb_message( struct brw_compile *p,
> insn->bits3.urb_gen7.offset = offset;
> assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
> insn->bits3.urb_gen7.swizzle_control = swizzle_control;
> - /* per_slot_offset = 0 makes it ignore offsets in message header */
> - insn->bits3.urb_gen7.per_slot_offset = 0;
> + insn->bits3.urb_gen7.per_slot_offset =
> + flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0;
> insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
> } else if (brw->gen >= 5) {
> insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index afa14c5..d3de6ed 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -485,7 +485,7 @@ brw_instruction_name(enum opcode op)
> return "placeholder_halt";
>
> case VS_OPCODE_URB_WRITE:
> - return "urb_write";
> + return "vs_urb_write";
> case VS_OPCODE_SCRATCH_READ:
> return "scratch_read";
> case VS_OPCODE_SCRATCH_WRITE:
> @@ -497,6 +497,9 @@ brw_instruction_name(enum opcode op)
> case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
> return "unpack_flags_simd4x2";
>
> + case GS_OPCODE_URB_WRITE:
> + return "gs_urb_write";
> +
> default:
> /* Yes, this leaks. It's in debug code, it should never occur, and if
> * it does, you should just add the case to the list above.
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index abdf3ab..c978396 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -259,6 +259,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
> return 2;
> case VS_OPCODE_SCRATCH_WRITE:
> return 3;
> + case GS_OPCODE_URB_WRITE:
> + return 0;
> case SHADER_OPCODE_SHADER_TIME_ADD:
> return 0;
> case SHADER_OPCODE_TEX:
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
> index a398f71..c3e2212 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -627,7 +627,8 @@ private:
> struct brw_reg dst,
> struct brw_reg src);
>
> - void generate_urb_write(vec4_instruction *inst);
> + void generate_vs_urb_write(vec4_instruction *inst);
> + void generate_gs_urb_write(vec4_instruction *inst);
> void generate_oword_dual_block_offsets(struct brw_reg m1,
> struct brw_reg index);
> void generate_scratch_write(vec4_instruction *inst,
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> index 89831de..681dbdd 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> @@ -399,7 +399,7 @@ vec4_generator::generate_tex(vec4_instruction *inst,
> }
>
> void
> -vec4_generator::generate_urb_write(vec4_instruction *inst)
> +vec4_generator::generate_vs_urb_write(vec4_instruction *inst)
> {
> brw_urb_WRITE(p,
> brw_null_reg(), /* dest */
> @@ -413,6 +413,21 @@ vec4_generator::generate_urb_write(vec4_instruction *inst)
> }
>
> void
> +vec4_generator::generate_gs_urb_write(vec4_instruction *inst)
> +{
> + struct brw_reg src = brw_message_reg(inst->base_mrf);
> + brw_urb_WRITE(p,
> + brw_null_reg(), /* dest */
> + inst->base_mrf, /* starting mrf reg nr */
> + src,
> + inst->urb_write_flags,
> + inst->mlen,
> + 0, /* response len */
> + inst->offset, /* urb destination offset */
> + BRW_URB_SWIZZLE_INTERLEAVE);
> +}
> +
> +void
> vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
> struct brw_reg index)
> {
> @@ -861,7 +876,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
> break;
>
> case VS_OPCODE_URB_WRITE:
> - generate_urb_write(inst);
> + generate_vs_urb_write(inst);
> break;
>
> case VS_OPCODE_SCRATCH_READ:
> @@ -880,6 +895,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
> generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
> break;
>
> + case GS_OPCODE_URB_WRITE:
> + generate_gs_urb_write(inst);
> + break;
> +
> case SHADER_OPCODE_SHADER_TIME_ADD:
> brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
> mark_surface_used(SURF_INDEX_VS_SHADER_TIME);
>
I don't like the idea of adding a new opcode - it really seems like we
should be able to reuse the same one. I see why you did it, though -
the number of implied MRF writes is different.
I have a couple of ideas:
1. Do what you did.
(No extra work, doesn't rock the boat...)
2. Encode implied_mrf_writes as a vec4_instruction field.
We did something similar a while back: rather than having a
fs_inst::regs_written() function that returned a value based on the
opcode, we switched to encoding it as a member (fs_inst::regs_written),
since the same opcode may be 4 on Gen5+ but 8 on Gen4.
This would allow us to reuse the same opcode, but set implied_mrf_writes
to 0 for GS, and 1 for VS.
It might also be nice for the math opcodes, since those currently say
they have 1 implied MRF write, but on Gen6+ they don't use MRFs at all.
3. Remove implicit g0 -> base_mrf writes altogether.
SEND instructions can only implicitly move g0 to the base MRF on Gen4-5;
it can't be done on Gen6+. We could instead do it explicitly, allowing
the scheduler to move them around. To avoid regressing Gen4, we could
add a post-scheduling optimization pass that rewrites "MOV m1 g0; ...;
SEND m1 ..." and rewrites it to "SEND g0 ...".
This seems like a lot of work, and I don't feel comfortable delaying
your series to implement this option.
So, I think my preference is for option #2, and my fall back is option
#1. What do you think?
--Ken
More information about the mesa-dev
mailing list