[Mesa-dev] [PATCH 09/22] i965/gs: Allocate push constant space for use by GS.

Paul Berry stereotype441 at gmail.com
Thu Aug 29 10:22:59 PDT 2013


On 28 August 2013 17:58, Kenneth Graunke <kenneth at whitecape.org> wrote:

> On 08/26/2013 03:12 PM, Paul Berry wrote:
>
>> Previously, we would always use the same push constant allocation
>> regardless of what shader programs were being run: the available push
>> constant space was split into 2 equal size partitions, one for the
>> vertex shader, and one for the fragment shader.
>>
>> Now that we are adding geometry shader support, we need to do
>> something smarter.  This patch adjusts things so that when a geometry
>> shader is in use, we split the available push constant space into 3
>> nearly-equal size partitions instead of 2.
>>
>> Since the push constant allocation is now affected by GL state, it can
>> no longer be set up by brw_upload_initial_gpu_state()**; instead it must
>> be set up by a state atom.
>> ---
>>   src/mesa/drivers/dri/i965/brw_**context.h      |   3 +-
>>   src/mesa/drivers/dri/i965/brw_**defines.h      |   1 +
>>   src/mesa/drivers/dri/i965/brw_**state.h        |   4 +-
>>   src/mesa/drivers/dri/i965/brw_**state_upload.c |   5 +-
>>   src/mesa/drivers/dri/i965/**gen7_blorp.cpp     |   6 ++
>>   src/mesa/drivers/dri/i965/**gen7_urb.c         | 101
>> +++++++++++++++++++++++----
>>   6 files changed, 98 insertions(+), 22 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/**brw_context.h
>> b/src/mesa/drivers/dri/i965/**brw_context.h
>> index 77f2a6b..95f9bb2 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/**brw_context.h
>> @@ -1508,7 +1508,8 @@ gen6_get_sample_position(**struct gl_context *ctx,
>>
>>   /* gen7_urb.c */
>>   void
>> -gen7_allocate_push_constants(**struct brw_context *brw);
>> +gen7_emit_push_constant_**state(struct brw_context *brw, unsigned
>> vs_size,
>> +                              unsigned gs_size, unsigned fs_size);
>>
>>   void
>>   gen7_emit_urb_state(struct brw_context *brw,
>> diff --git a/src/mesa/drivers/dri/i965/**brw_defines.h
>> b/src/mesa/drivers/dri/i965/**brw_defines.h
>> index 832ff55..8d9a824 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_defines.h
>> +++ b/src/mesa/drivers/dri/i965/**brw_defines.h
>> @@ -1284,6 +1284,7 @@ enum brw_message_target {
>>   # define GEN7_URB_STARTING_ADDRESS_**SHIFT                25
>>
>>   #define _3DSTATE_PUSH_CONSTANT_ALLOC_**VS         0x7912 /* GEN7+ */
>> +#define _3DSTATE_PUSH_CONSTANT_ALLOC_**GS         0x7915 /* GEN7+ */
>>   #define _3DSTATE_PUSH_CONSTANT_ALLOC_**PS         0x7916 /* GEN7+ */
>>   # define GEN7_PUSH_CONSTANT_BUFFER_**OFFSET_SHIFT         16
>>
>> diff --git a/src/mesa/drivers/dri/i965/**brw_state.h
>> b/src/mesa/drivers/dri/i965/**brw_state.h
>> index 85f82fe..4814639 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_state.h
>> +++ b/src/mesa/drivers/dri/i965/**brw_state.h
>> @@ -112,6 +112,7 @@ extern const struct brw_tracked_state
>> gen7_cc_viewport_state_**pointer;
>>   extern const struct brw_tracked_state gen7_clip_state;
>>   extern const struct brw_tracked_state gen7_disable_stages;
>>   extern const struct brw_tracked_state gen7_ps_state;
>> +extern const struct brw_tracked_state gen7_push_constant_space;
>>   extern const struct brw_tracked_state gen7_sbe_state;
>>   extern const struct brw_tracked_state gen7_sf_clip_viewport;
>>   extern const struct brw_tracked_state gen7_sf_state;
>> @@ -220,9 +221,6 @@ uint32_t
>>   get_attr_override(const struct brw_vue_map *vue_map, int
>> urb_entry_read_offset,
>>                     int fs_attr, bool two_side_color, uint32_t
>> *max_source_attr);
>>
>> -/* gen7_urb.c */
>> -void gen7_allocate_push_constants(**struct brw_context *brw);
>> -
>>   #ifdef __cplusplus
>>   }
>>   #endif
>> diff --git a/src/mesa/drivers/dri/i965/**brw_state_upload.c
>> b/src/mesa/drivers/dri/i965/**brw_state_upload.c
>> index b883002..9638c69 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_state_upload.c
>> +++ b/src/mesa/drivers/dri/i965/**brw_state_upload.c
>> @@ -188,6 +188,7 @@ static const struct brw_tracked_state *gen7_atoms[] =
>>      &gen7_cc_viewport_state_**pointer, /* must do after brw_cc_vp */
>>      &gen7_sf_clip_viewport,
>>
>> +   &gen7_push_constant_space,
>>      &gen7_urb,
>>      &gen6_blend_state,         /* must do before cc unit */
>>      &gen6_color_calc_state,    /* must do before cc unit */
>> @@ -251,10 +252,6 @@ brw_upload_initial_gpu_state(**struct brw_context
>> *brw)
>>         return;
>>
>>      brw_upload_invariant_state(**brw);
>> -
>> -   if (brw->gen >= 7) {
>> -      gen7_allocate_push_constants(**brw);
>> -   }
>>   }
>>
>>   void brw_init_state( struct brw_context *brw )
>> diff --git a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> index 6c798b1..9df3d92 100644
>> --- a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> +++ b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> @@ -51,6 +51,12 @@ static void
>>   gen7_blorp_emit_urb_config(**struct brw_context *brw,
>>                              const brw_blorp_params *params)
>>   {
>> +   unsigned urb_size = (brw->is_haswell && brw->gt == 3) ? 32 : 16;
>> +   gen7_emit_push_constant_state(**brw,
>> +                                 urb_size / 2 /* vs_size */,
>> +                                 0 /* gs_size */,
>> +                                 urb_size / 2 /* fs_size */);
>> +
>>      /* The minimum valid number of VS entries is 32. See 3DSTATE_URB_VS,
>> Dword
>>       * 1.15:0 "VS Number of URB Entries".
>>       */
>> diff --git a/src/mesa/drivers/dri/i965/**gen7_urb.c
>> b/src/mesa/drivers/dri/i965/**gen7_urb.c
>> index 2d10cc12..4dc8f6e 100644
>> --- a/src/mesa/drivers/dri/i965/**gen7_urb.c
>> +++ b/src/mesa/drivers/dri/i965/**gen7_urb.c
>> @@ -30,12 +30,12 @@
>>   /**
>>    * The following diagram shows how we partition the URB:
>>    *
>> - *      8kB         8kB              Rest of the URB space
>> - *   ____-____   ____-____   _________________-____________**_____
>> - *  /         \ /         \ /                                   \
>> + *        16kB or 32kB               Rest of the URB space
>> + *   __________-__________   _________________-____________**_____
>> + *  /                     \ /                                   \
>>    * +-----------------------------**------------------------------**--+
>> - * | VS Push   | FS Push   | VS                                  |
>> - * | Constants | Constants | Handles                             |
>> + * |     VS/FS/GS Push     |              VS/GS URB              |
>> + * |       Constants       |               Entries               |
>>    * +-----------------------------**------------------------------**--+
>>    *
>>    * Notably, push constants must be stored at the beginning of the URB
>> @@ -43,8 +43,12 @@
>>    * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell
>> GT3
>>    * doubles this (32kB).
>>    *
>> - * Currently we split the constant buffer space evenly between VS and FS.
>> - * This is probably not ideal, but simple.
>> + * Ivybridge and Haswell GT1/GT2 allow push constants to be located (and
>> + * sized) in increments of 1kB.  Haswell GT3 requires them to be located
>> and
>> + * sized in increments of 2kB.
>> + *
>> + * Currently we split the constant buffer space evenly among whatever
>> stages
>> + * are active.  This is probably not ideal, but simple.
>>    *
>>    * Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
>>    * Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
>> @@ -53,22 +57,91 @@
>>    * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b:
>> Configurations",
>>    * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_**xS.
>>    */
>> -void
>> +static void
>>   gen7_allocate_push_constants(**struct brw_context *brw)
>>   {
>> -   unsigned size = 8;
>> -   if (brw->is_haswell && brw->gt == 3)
>> -      size = 16;
>> +   unsigned avail_size = 16;
>> +   unsigned multiplier = (brw->is_haswell && brw->gt == 3) ? 2 : 1;
>> +
>> +   /* BRW_NEW_GEOMETRY_PROGRAM */
>> +   bool gs_present = brw->geometry_program;
>> +
>> +   unsigned vs_size, gs_size;
>> +   if (gs_present) {
>> +      vs_size = avail_size / 3;
>> +      avail_size -= vs_size;
>> +      gs_size = avail_size / 2;
>> +      avail_size -= gs_size;
>> +   } else {
>> +      vs_size = avail_size / 2;
>> +      avail_size -= vs_size;
>> +      gs_size = 0;
>> +   }
>> +   unsigned fs_size = avail_size;
>> +
>> +   gen7_emit_push_constant_state(**brw, multiplier * vs_size,
>> +                                 multiplier * gs_size, multiplier *
>> fs_size);
>> +}
>> +
>> +void
>> +gen7_emit_push_constant_**state(struct brw_context *brw, unsigned
>> vs_size,
>> +                              unsigned gs_size, unsigned fs_size)
>> +{
>> +   unsigned offset = 0;
>>
>> -   BEGIN_BATCH(4);
>> +   BEGIN_BATCH(6);
>>      OUT_BATCH(_3DSTATE_PUSH_**CONSTANT_ALLOC_VS << 16 | (2 - 2));
>> -   OUT_BATCH(size);
>> +   OUT_BATCH(vs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_**
>> OFFSET_SHIFT);
>> +   offset += vs_size;
>> +
>> +   OUT_BATCH(_3DSTATE_PUSH_**CONSTANT_ALLOC_GS << 16 | (2 - 2));
>> +   OUT_BATCH(gs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_**
>> OFFSET_SHIFT);
>> +   offset += gs_size;
>>
>>      OUT_BATCH(_3DSTATE_PUSH_**CONSTANT_ALLOC_PS << 16 | (2 - 2));
>> -   OUT_BATCH(size | size << GEN7_PUSH_CONSTANT_BUFFER_**OFFSET_SHIFT);
>> +   OUT_BATCH(offset | fs_size << GEN7_PUSH_CONSTANT_BUFFER_**
>> OFFSET_SHIFT);
>>      ADVANCE_BATCH();
>> +
>> +   /* From p292 of the Ivy Bridge PRM (11.2.4
>> 3DSTATE_PUSH_CONSTANT_ALLOC_**PS):
>> +    *
>> +    *     A PIPE_CONTOL command with the CS Stall bit set must be
>> programmed
>> +    *     in the ring after this instruction.
>> +    *
>> +    * No such restriction exists for Haswell.
>> +    */
>> +   if (!brw->is_haswell) {
>> +      BEGIN_BATCH(4);
>> +      OUT_BATCH(_3DSTATE_PIPE_**CONTROL | (4 - 2));
>> +      /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command:
>> DW1[20]
>> +       * CS Stall):
>> +       *
>> +       *     One of the following must also be set:
>> +       *     - Render Target Cache Flush Enable ([12] of DW1)
>> +       *     - Depth Cache Flush Enable ([0] of DW1)
>> +       *     - Stall at Pixel Scoreboard ([1] of DW1)
>> +       *     - Depth Stall ([13] of DW1)
>> +       *     - Post-Sync Operation ([13] of DW1)
>> +       *
>> +       * We choose to do a Post-Sync Operation (Write Immediate Data),
>> since
>> +       * it seems like it will incur the least additional performance
>> penalty.
>> +       */
>> +      OUT_BATCH(PIPE_CONTROL_CS_**STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
>> +      OUT_RELOC(brw->batch.**workaround_bo,
>> +                I915_GEM_DOMAIN_INSTRUCTION,
>> I915_GEM_DOMAIN_INSTRUCTION, 0);
>> +      OUT_BATCH(0);
>> +      ADVANCE_BATCH();
>> +   }
>>
>
> This is really a bug fix, since it's implementing a hardware workaround on
> a packet we've always been emitting.  I'd really like to see it in a
> separate patch, for easier bisectability.
>

Sure, no problem.  It turns out this was really easy to split out to its
own patch.


>
> Interestingly, this text doesn't appear in the latest documentation, and I
> don't see it in the workaround database.  It is definitely there in the
> PRM, though.  I'm not sure what to make of it.


I see it in the current bspec, at the top of
3DSTATE_PUSH_CONSTANT_ALLOC_PS, in the "Description" section.


>
>
>    }
>>
>> +const struct brw_tracked_state gen7_push_constant_space = {
>> +   .dirty = {
>> +      .mesa = 0,
>> +      .brw = BRW_NEW_CONTEXT | BRW_NEW_GEOMETRY_PROGRAM,
>>
>
> It's worth noting that this will cause a full pipeline stall whenever the
> current geometry shader changes (since 3DSTATE_PUSH_CONSTANT_ALLOC_* are
> non-pipelined).  We could avoid this when switching between two different
> geometry shaders.  But I suspect it's premature to care about that.  Not
> sure it'll matter much anyway.
>
>
>  +      .cache = 0,
>> +   },
>> +   .emit = gen7_allocate_push_constants,
>> +};
>> +
>>   static void
>>   gen7_upload_urb(struct brw_context *brw)
>>   {
>>
>>
> Other than pulling the PIPE_CONTROL into a separate patch, this looks fine
> to me.
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130829/e120b76e/attachment-0001.html>


More information about the mesa-dev mailing list