[Mesa-dev] [PATCH 14/30] i965/gs: Add GS_OPCODE_URB_WRITE.

Paul Berry stereotype441 at gmail.com
Thu Aug 22 08:14:51 PDT 2013


On 20 August 2013 11:30, Paul Berry <stereotype441 at gmail.com> wrote:

> ---
>  src/mesa/drivers/dri/i965/brw_defines.h     |  9 +++++++++
>  src/mesa/drivers/dri/i965/brw_eu.h          |  6 ++++++
>  src/mesa/drivers/dri/i965/brw_eu_emit.c     |  4 ++--
>  src/mesa/drivers/dri/i965/brw_shader.cpp    |  5 ++++-
>  src/mesa/drivers/dri/i965/brw_vec4.cpp      |  2 ++
>  src/mesa/drivers/dri/i965/brw_vec4.h        |  3 ++-
>  src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 23 +++++++++++++++++++++--
>  7 files changed, 46 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 2ab0a2b..16a1dbc 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -799,6 +799,15 @@ enum opcode {
>     VS_OPCODE_PULL_CONSTANT_LOAD,
>     VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
>     VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
> +
> +   /**
> +    * Write geometry shader output data to the URB.
> +    *
> +    * Unlike VS_OPCODE_URB_WRITE, this opcode doesn't do an implied move
> from
> +    * R0 to the first MRF.  This allows the geometry shader to override
> the
> +    * "Slot {0,1} Offset" fields in the message header.
> +    */
> +   GS_OPCODE_URB_WRITE,
>  };
>
>  #define BRW_PREDICATE_NONE             0
> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h
> b/src/mesa/drivers/dri/i965/brw_eu.h
> index ae4cab5..9053ea2 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu.h
> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
> @@ -252,6 +252,12 @@ enum brw_urb_write_flags {
>     BRW_URB_WRITE_COMPLETE = 0x8,
>
>     /**
> +    * Indicates that an additional offset (which may be different for the
> two
> +    * vec4 slots) is stored in the message header (gen == 7).
> +    */
> +   BRW_URB_WRITE_PER_SLOT_OFFSET = 0x10,
> +
> +   /**
>      * Convenient combination of flags: end the thread while simultaneously
>      * marking the given URB entry as complete.
>      */
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index 622b22f..b55b57e 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -531,8 +531,8 @@ static void brw_set_urb_message( struct brw_compile *p,
>        insn->bits3.urb_gen7.offset = offset;
>        assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
>        insn->bits3.urb_gen7.swizzle_control = swizzle_control;
> -      /* per_slot_offset = 0 makes it ignore offsets in message header */
> -      insn->bits3.urb_gen7.per_slot_offset = 0;
> +      insn->bits3.urb_gen7.per_slot_offset =
> +         flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0;
>        insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1
> : 0;
>     } else if (brw->gen >= 5) {
>        insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index afa14c5..d3de6ed 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -485,7 +485,7 @@ brw_instruction_name(enum opcode op)
>        return "placeholder_halt";
>
>     case VS_OPCODE_URB_WRITE:
> -      return "urb_write";
> +      return "vs_urb_write";
>     case VS_OPCODE_SCRATCH_READ:
>        return "scratch_read";
>     case VS_OPCODE_SCRATCH_WRITE:
> @@ -497,6 +497,9 @@ brw_instruction_name(enum opcode op)
>     case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
>        return "unpack_flags_simd4x2";
>
> +   case GS_OPCODE_URB_WRITE:
> +      return "gs_urb_write";
> +
>     default:
>        /* Yes, this leaks.  It's in debug code, it should never occur, and
> if
>         * it does, you should just add the case to the list above.
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index abdf3ab..c978396 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -259,6 +259,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction
> *inst)
>        return 2;
>     case VS_OPCODE_SCRATCH_WRITE:
>        return 3;
> +   case GS_OPCODE_URB_WRITE:
> +      return 0;
>     case SHADER_OPCODE_SHADER_TIME_ADD:
>        return 0;
>     case SHADER_OPCODE_TEX:
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index a398f71..c3e2212 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -627,7 +627,8 @@ private:
>                      struct brw_reg dst,
>                      struct brw_reg src);
>
> -   void generate_urb_write(vec4_instruction *inst);
> +   void generate_vs_urb_write(vec4_instruction *inst);
> +   void generate_gs_urb_write(vec4_instruction *inst);
>     void generate_oword_dual_block_offsets(struct brw_reg m1,
>                                           struct brw_reg index);
>     void generate_scratch_write(vec4_instruction *inst,
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> index 89831de..681dbdd 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> @@ -399,7 +399,7 @@ vec4_generator::generate_tex(vec4_instruction *inst,
>  }
>
>  void
> -vec4_generator::generate_urb_write(vec4_instruction *inst)
> +vec4_generator::generate_vs_urb_write(vec4_instruction *inst)
>  {
>     brw_urb_WRITE(p,
>                  brw_null_reg(), /* dest */
> @@ -413,6 +413,21 @@ vec4_generator::generate_urb_write(vec4_instruction
> *inst)
>  }
>
>  void
> +vec4_generator::generate_gs_urb_write(vec4_instruction *inst)
> +{
> +   struct brw_reg src = brw_message_reg(inst->base_mrf);
> +   brw_urb_WRITE(p,
> +                brw_null_reg(), /* dest */
> +                inst->base_mrf, /* starting mrf reg nr */
> +                src,
> +                 inst->urb_write_flags,
> +                inst->mlen,
> +                0,             /* response len */
> +                inst->offset,  /* urb destination offset */
> +                BRW_URB_SWIZZLE_INTERLEAVE);
> +}
>

>From our in-person code review yesterday:

There are space vs. tab issues in the code above.


> +
> +void
>  vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
>                                                    struct brw_reg index)
>  {
> @@ -861,7 +876,7 @@
> vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
>        break;
>
>     case VS_OPCODE_URB_WRITE:
> -      generate_urb_write(inst);
> +      generate_vs_urb_write(inst);
>        break;
>
>     case VS_OPCODE_SCRATCH_READ:
> @@ -880,6 +895,10 @@
> vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
>        generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
>        break;
>
> +   case GS_OPCODE_URB_WRITE:
> +      generate_gs_urb_write(inst);
> +      break;
> +
>     case SHADER_OPCODE_SHADER_TIME_ADD:
>        brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
>        mark_surface_used(SURF_INDEX_VS_SHADER_TIME);
> --
> 1.8.3.4
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130822/806e135d/attachment-0001.html>


More information about the mesa-dev mailing list