[Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.

Jordan Justen jljusten at gmail.com
Thu Sep 18 00:48:45 PDT 2014


On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga <itoral at igalia.com> wrote:
> From: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
>
> We will use this parameter in later patches to provide information relevant
> to transform feedback that needs to be set as part of the FF_SYNC message.
>
> Signed-off-by: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h          |  4 ++++
>  src/mesa/drivers/dri/i965/brw_vec4.h             |  3 ++-
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +++++++++++++---
>  src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp    |  3 ++-
>  4 files changed, 21 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 6e8b998..b0d6d9f 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1030,6 +1030,10 @@ enum opcode {
>      *   FF_SYNC operation.
>      *
>      * - src1 is the number of primitives written.
> +    *
> +    * - src2 is the value to hold in M0.0: number of SO vertices to write
> +    *   and number of SO primitives needed. Its value will be overwritten
> +    *   with the SVBI values if transform feedback is enabled.
>      */
>     GS_OPCODE_FF_SYNC,
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 763cb23..58a5aac 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -679,7 +679,8 @@ private:
>                                             struct brw_reg src2);
>     void generate_gs_ff_sync(struct brw_reg dst,
>                              struct brw_reg src0,
> -                            struct brw_reg src1);
> +                            struct brw_reg src1,
> +                            struct brw_reg src2);
>     void generate_gs_set_primitive_id(struct brw_reg dst);
>     void generate_oword_dual_block_offsets(struct brw_reg m1,
>                                           struct brw_reg index);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index d4554f5..c69b305 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -734,7 +734,8 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
>  void
>  vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
>                                      struct brw_reg src0,
> -                                    struct brw_reg src1)
> +                                    struct brw_reg src1,
> +                                    struct brw_reg src2)
>  {
>     /* We use dst to setup the ff_sync header, so we expect it to be
>      * initialized to R0 by the caller. Here we overwrite dword 0 (cleared
> @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
>     brw_push_insn_state(p);
>     brw_set_default_mask_control(p, BRW_MASK_DISABLE);
>     brw_set_default_access_mode(p, BRW_ALIGN_1);
> -   brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0));
> +   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0));
>     brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0));
>     brw_set_default_access_mode(p, BRW_ALIGN_16);
>     brw_pop_insn_state(p);
> @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
>     brw_set_default_access_mode(p, BRW_ALIGN_1);
>     brw_set_default_mask_control(p, BRW_MASK_DISABLE);
>     brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0));
> +
> +   /* src2 is not an immediate when we use transform feedback */
> +   if (src2.file != BRW_IMMEDIATE_VALUE) {
> +      brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1));
> +      brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2));
> +      brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3));
> +      brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4));

Ken and I discussed this a bit. Ken suggested that this:
brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1));

Should be able to copy all 4 dwords in one instruction. What do you think?

By the way, this was for the version of this patch on the
gs-support-snb-for-submission-02092014 which has src1 as the
destination and dst as the source for the moves. (Hmm, not sure about
the src1 naming in this context...)

If that change seem good, then
Reviewed-by: Jordan Justen <jordan.l.justen at intel.com>

> +   }
> +
>     brw_set_default_access_mode(p, BRW_ALIGN_16);
>     brw_pop_insn_state(p);
>  }
> @@ -1374,7 +1384,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
>        break;
>
>     case GS_OPCODE_FF_SYNC:
> -      generate_gs_ff_sync(dst, src[0], src[1]);
> +      generate_gs_ff_sync(dst, src[0], src[1], src[2]);
>        break;
>
>     case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
> diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
> index b45c381..c1cfe75 100644
> --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
> @@ -331,7 +331,8 @@ gen6_gs_visitor::emit_thread_end()
>     {
>        this->current_annotation = "gen6 thread end: ff_sync";
>        emit(GS_OPCODE_FF_SYNC,
> -           dst_reg(MRF, base_mrf), this->temp, this->prim_count);
> +           dst_reg(MRF, base_mrf), this->temp, this->prim_count,
> +           brw_imm_ud(0u));
>
>        /* Loop over all buffered vertices and emit URB write messages */
>        this->current_annotation = "gen6 thread end: urb writes init";
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list