[Mesa-dev] [PATCH 3/3] i965: Port Gen4-5 VS_STATE to genxml.

Rafael Antognolli rafael.antognolli at intel.com
Thu May 11 18:17:43 UTC 2017


Ugh, and I also forgot: you can remove brw_vs_unit from brw_state.h too.

On Thu, May 11, 2017 at 09:54:13AM -0700, Rafael Antognolli wrote:
> On Wed, May 10, 2017 at 12:41:39PM -0700, Kenneth Graunke wrote:
> > It's actually not that much code.
> > ---
> >  src/intel/genxml/gen4.xml                     |   2 +-
> >  src/intel/genxml/gen45.xml                    |   2 +-
> >  src/intel/genxml/gen5.xml                     |   2 +-
> >  src/mesa/drivers/dri/i965/Makefile.sources    |   1 -
> >  src/mesa/drivers/dri/i965/brw_vs_state.c      | 192 --------------------------
> >  src/mesa/drivers/dri/i965/genX_state_upload.c |  74 +++++++++-
> >  6 files changed, 71 insertions(+), 202 deletions(-)
> >  delete mode 100644 src/mesa/drivers/dri/i965/brw_vs_state.c
> > 
> > diff --git a/src/intel/genxml/gen4.xml b/src/intel/genxml/gen4.xml
> > index f4d58879bcc..da34d6ca19b 100644
> > --- a/src/intel/genxml/gen4.xml
> > +++ b/src/intel/genxml/gen4.xml
> > @@ -750,13 +750,13 @@
> >      <field name="URB Entry Allocation Size" start="147" end="151" type="uint"/>
> >      <field name="Number of URB Entries" start="139" end="146" type="uint"/>
> >      <field name="Statistics Enable" start="138" end="138" type="bool"/>
> >      <field name="Sampler State Offset" start="165" end="191" type="address"/>
> >      <field name="Sampler Count" start="160" end="162" type="uint"/>
> >      <field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
> > -    <field name="Function Enable" start="192" end="192" type="bool"/>
> > +    <field name="Enable" start="192" end="192" type="bool"/>
> >    </struct>
> >  
> >    <struct name="WM_STATE" length="8">
> >      <field name="Kernel Start Pointer" start="6" end="31" type="address"/>
> >      <field name="GRF Register Count" start="1" end="3" type="uint"/>
> >      <field name="Single Program Flow" start="63" end="63" type="bool"/>
> > diff --git a/src/intel/genxml/gen45.xml b/src/intel/genxml/gen45.xml
> > index 4556322fee5..0e95cf27e89 100644
> > --- a/src/intel/genxml/gen45.xml
> > +++ b/src/intel/genxml/gen45.xml
> > @@ -701,13 +701,13 @@
> >      <field name="URB Entry Allocation Size" start="147" end="151" type="uint"/>
> >      <field name="Number of URB Entries" start="139" end="146" type="uint"/>
> >      <field name="Statistics Enable" start="138" end="138" type="bool"/>
> >      <field name="Sampler State Offset" start="165" end="191" type="address"/>
> >      <field name="Sampler Count" start="160" end="162" type="uint"/>
> >      <field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
> > -    <field name="Function Enable" start="192" end="192" type="bool"/>
> > +    <field name="Enable" start="192" end="192" type="bool"/>
> >    </struct>
> >  
> >    <struct name="WM_STATE" length="8">
> >      <field name="Kernel Start Pointer" start="6" end="31" type="address"/>
> >      <field name="GRF Register Count" start="1" end="3" type="uint"/>
> >      <field name="Single Program Flow" start="63" end="63" type="bool"/>
> > diff --git a/src/intel/genxml/gen5.xml b/src/intel/genxml/gen5.xml
> > index 3d80de9cf1e..c9a18dd18e3 100644
> > --- a/src/intel/genxml/gen5.xml
> > +++ b/src/intel/genxml/gen5.xml
> > @@ -857,13 +857,13 @@
> >      <field name="URB Entry Allocation Size" start="147" end="151" type="uint"/>
> >      <field name="Number of URB Entries" start="139" end="146" type="uint"/>
> >      <field name="Statistics Enable" start="138" end="138" type="bool"/>
> >      <field name="Sampler State Offset" start="165" end="191" type="address"/>
> >      <field name="Sampler Count" start="160" end="162" type="uint"/>
> >      <field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
> > -    <field name="Function Enable" start="192" end="192" type="bool"/>
> > +    <field name="Enable" start="192" end="192" type="bool"/>
> >    </struct>
> >  
> >    <struct name="WM_STATE" length="11">
> >      <field name="Kernel Start Pointer" start="6" end="31" type="offset"/>
> >      <field name="GRF Register Count" start="1" end="3" type="uint"/>
> >      <field name="Single Program Flow" start="63" end="63" type="bool"/>
> > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
> > index 9e567cbc908..349777cf24a 100644
> > --- a/src/mesa/drivers/dri/i965/Makefile.sources
> > +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> > @@ -67,13 +67,12 @@ i965_FILES = \
> >  	brw_tex_layout.c \
> >  	brw_urb.c \
> >  	brw_util.c \
> >  	brw_util.h \
> >  	brw_vs.c \
> >  	brw_vs.h \
> > -	brw_vs_state.c \
> >  	brw_vs_surface_state.c \
> >  	brw_wm.c \
> >  	brw_wm.h \
> >  	brw_wm_state.c \
> >  	brw_wm_surface_state.c \
> >  	gen6_clip_state.c \
> > diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
> > deleted file mode 100644
> > index fafe305f4f7..00000000000
> > --- a/src/mesa/drivers/dri/i965/brw_vs_state.c
> > +++ /dev/null
> > @@ -1,192 +0,0 @@
> > -/*
> > - Copyright (C) Intel Corp.  2006.  All Rights Reserved.
> > - Intel funded Tungsten Graphics to
> > - develop this 3D driver.
> > -
> > - Permission is hereby granted, free of charge, to any person obtaining
> > - a copy of this software and associated documentation files (the
> > - "Software"), to deal in the Software without restriction, including
> > - without limitation the rights to use, copy, modify, merge, publish,
> > - distribute, sublicense, and/or sell copies of the Software, and to
> > - permit persons to whom the Software is furnished to do so, subject to
> > - the following conditions:
> > -
> > - The above copyright notice and this permission notice (including the
> > - next paragraph) shall be included in all copies or substantial
> > - portions of the Software.
> > -
> > - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> > - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
> > - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
> > - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
> > - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
> > - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> > -
> > - **********************************************************************/
> > - /*
> > -  * Authors:
> > -  *   Keith Whitwell <keithw at vmware.com>
> > -  */
> > -
> > -
> > -
> > -#include "intel_batchbuffer.h"
> > -#include "brw_context.h"
> > -#include "brw_state.h"
> > -#include "brw_defines.h"
> > -#include "main/macros.h"
> > -
> > -static void
> > -brw_upload_vs_unit(struct brw_context *brw)
> > -{
> > -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> > -   struct brw_stage_state *stage_state = &brw->vs.base;
> > -   const struct brw_stage_prog_data *prog_data = stage_state->prog_data;
> > -   const struct brw_vue_prog_data *vue_prog_data =
> > -      brw_vue_prog_data(stage_state->prog_data);
> > -
> > -   struct brw_vs_unit_state *vs;
> 
> It looks like brw_vs_unit_state is not used anywhere else, so maybe we could
> delete it too. Also, it looks weird that some fields in vs->thread0 don't
> really match the dw1 in the docs for gen5 at least, but they are not used
> anyway.
> 
> > -   vs = brw_state_batch(brw, sizeof(*vs), 32, &stage_state->state_offset);
> > -   memset(vs, 0, sizeof(*vs));
> > -
> > -   /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_VS_PROG_DATA */
> > -   vs->thread0.grf_reg_count = ALIGN(vue_prog_data->total_grf, 16) / 16 - 1;
> > -   vs->thread0.kernel_start_pointer =
> > -      brw_program_reloc(brw,
> > -			stage_state->state_offset +
> > -			offsetof(struct brw_vs_unit_state, thread0),
> > -			stage_state->prog_offset +
> > -			(vs->thread0.grf_reg_count << 1)) >> 6;
> > -
> > -   if (prog_data->use_alt_mode)
> > -      vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
> > -   else
> > -      vs->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
> > -
> > -   /* Choosing multiple program flow means that we may get 2-vertex threads,
> > -    * which will have the channel mask for dwords 4-7 enabled in the thread,
> > -    * and those dwords will be written to the second URB handle when we
> > -    * brw_urb_WRITE() results.
> > -    */
> > -   /* Force single program flow on Ironlake.  We cannot reliably get
> > -    * all applications working without it.  See:
> > -    * https://bugs.freedesktop.org/show_bug.cgi?id=29172
> > -    *
> > -    * The most notable and reliably failing application is the Humus
> > -    * demo "CelShading"
> > -   */
> > -   vs->thread1.single_program_flow = (brw->gen == 5);
> > -
> > -   vs->thread1.binding_table_entry_count =
> > -      prog_data->binding_table.size_bytes / 4;
> > -
> > -   if (prog_data->total_scratch != 0) {
> > -      vs->thread2.scratch_space_base_pointer =
> > -	 stage_state->scratch_bo->offset64 >> 10; /* reloc */
> > -      vs->thread2.per_thread_scratch_space =
> > -	 ffs(stage_state->per_thread_scratch) - 11;
> > -   } else {
> > -      vs->thread2.scratch_space_base_pointer = 0;
> > -      vs->thread2.per_thread_scratch_space = 0;
> > -   }
> > -
> > -   vs->thread3.urb_entry_read_length = vue_prog_data->urb_read_length;
> > -   vs->thread3.const_urb_entry_read_length = prog_data->curb_read_length;
> > -   vs->thread3.dispatch_grf_start_reg = prog_data->dispatch_grf_start_reg;
> > -   vs->thread3.urb_entry_read_offset = 0;
> > -
> > -   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
> > -   vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
> > -
> > -   /* BRW_NEW_URB_FENCE */
> > -   if (brw->gen == 5) {
> > -      switch (brw->urb.nr_vs_entries) {
> > -      case 8:
> > -      case 12:
> > -      case 16:
> > -      case 32:
> > -      case 64:
> > -      case 96:
> > -      case 128:
> > -      case 168:
> > -      case 192:
> > -      case 224:
> > -      case 256:
> > -	 vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2;
> > -	 break;
> > -      default:
> > -         unreachable("not reached");
> > -      }
> > -   } else {
> > -      switch (brw->urb.nr_vs_entries) {
> > -      case 8:
> > -      case 12:
> > -      case 16:
> > -      case 32:
> > -	 break;
> > -      case 64:
> > -	 assert(brw->is_g4x);
> > -	 break;
> > -      default:
> > -         unreachable("not reached");
> > -      }
> > -      vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries;
> > -   }
> > -
> > -   vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
> > -
> > -   vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
> > -				   1, devinfo->max_vs_threads) - 1;
> > -
> > -   if (brw->gen == 5)
> > -      vs->vs5.sampler_count = 0; /* hardware requirement */
> > -   else {
> > -      vs->vs5.sampler_count = (stage_state->sampler_count + 3) / 4;
> > -   }
> > -
> > -   /* Vertex program always enabled:
> > -    */
> > -   vs->vs6.vs_enable = 1;
> > -
> > -   /* Set the sampler state pointer, and its reloc
> > -    */
> > -   if (stage_state->sampler_count) {
> > -      /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */
> > -      vs->vs5.sampler_state_pointer =
> > -         (brw->batch.bo->offset64 + stage_state->sampler_offset) >> 5;
> > -      brw_emit_reloc(&brw->batch,
> > -                     stage_state->state_offset +
> > -                     offsetof(struct brw_vs_unit_state, vs5),
> > -                     brw->batch.bo,
> > -                     (stage_state->sampler_offset | vs->vs5.sampler_count),
> > -                     I915_GEM_DOMAIN_INSTRUCTION, 0);
> > -   }
> > -
> > -   /* Emit scratch space relocation */
> > -   if (prog_data->total_scratch != 0) {
> > -      brw_emit_reloc(&brw->batch,
> > -                     stage_state->state_offset +
> > -                     offsetof(struct brw_vs_unit_state, thread2),
> > -                     stage_state->scratch_bo,
> > -                     vs->thread2.per_thread_scratch_space,
> > -                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
> > -   }
> > -
> > -   brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
> > -}
> > -
> > -const struct brw_tracked_state brw_vs_unit = {
> > -   .dirty = {
> > -      .mesa  = 0,
> > -      .brw   = BRW_NEW_BATCH |
> 
> If I understood correctly, BRW_NEW_BATCH will get covered by BRW_NEW_CONTEXT
> when needed, right?
> 
> > -               BRW_NEW_BLORP |
> > -               BRW_NEW_PROGRAM_CACHE |
> > -               BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> > -               BRW_NEW_SAMPLER_STATE_TABLE |
> > -               BRW_NEW_URB_FENCE |
> > -               BRW_NEW_VS_PROG_DATA,
> > -   },
> > -   .emit = brw_upload_vs_unit,
> > -};
> > diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
> > index 4c6cb1a9b71..bc7f068d5bb 100644
> > --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> > +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> > @@ -130,12 +130,23 @@ instruction_bo(struct brw_bo *bo, uint32_t offset)
> >              .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
> >              .write_domain = I915_GEM_DOMAIN_INSTRUCTION,
> >     };
> >  }
> >  
> >  static inline struct brw_address
> > +instruction_ro_bo(struct brw_bo *bo, uint32_t offset)
> > +{
> > +   return (struct brw_address) {
> > +            .bo = bo,
> > +            .offset = offset,
> > +            .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
> > +            .write_domain = 0,
> > +   };
> > +}
> > +
> > +static inline struct brw_address
> >  vertex_bo(struct brw_bo *bo, uint32_t offset)
> >  {
> >     return (struct brw_address) {
> >              .bo = bo,
> >              .offset = offset,
> >              .read_domains = I915_GEM_DOMAIN_VERTEX,
> > @@ -1690,14 +1701,28 @@ static const struct brw_tracked_state genX(wm_state) = {
> >     .emit = genX(upload_wm),
> >  };
> >  #endif
> >  
> >  /* ---------------------------------------------------------------------- */
> >  
> > +#if GEN_GEN == 4
> > +static inline struct brw_address
> > +KSP(struct brw_context *brw, uint32_t offset)
> > +{
> > +   return instruction_bo(brw->cache.bo, offset);
> > +}
> > +#else
> > +static inline uint32_t
> > +KSP(struct brw_context *brw, uint32_t offset)
> > +{
> > +   return offset;
> > +}
> > +#endif
> > +
> >  #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
> > -   pkt.KernelStartPointer = stage_state->prog_offset;                     \
> > +   pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset);           \
> >     pkt.SamplerCount       =                                               \
> >        DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);          \
> >     pkt.BindingTableEntryCount =                                           \
> >        stage_prog_data->binding_table.size_bytes / 4;                      \
> >     pkt.FloatingPointMode  = stage_prog_data->use_alt_mode;                \
> >                                                                            \
> > @@ -1713,18 +1738,18 @@ static const struct brw_tracked_state genX(wm_state) = {
> >     pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length;       \
> >     pkt.prefix##URBEntryReadOffset = 0;                                    \
> >                                                                            \
> >     pkt.StatisticsEnable = true;                                           \
> >     pkt.Enable           = true;
> >  
> > -#if GEN_GEN >= 6
> >  static void
> >  genX(upload_vs_state)(struct brw_context *brw)
> >  {
> > +   UNUSED struct gl_context *ctx = &brw->ctx;
> >     const struct gen_device_info *devinfo = &brw->screen->devinfo;
> > -   const struct brw_stage_state *stage_state = &brw->vs.base;
> > +   struct brw_stage_state *stage_state = &brw->vs.base;
> >  
> >     /* BRW_NEW_VS_PROG_DATA */
> >     const struct brw_vue_prog_data *vue_prog_data =
> >        brw_vue_prog_data(brw->vs.base.prog_data);
> >     const struct brw_stage_prog_data *stage_prog_data = &vue_prog_data->base;
> >  
> > @@ -1752,17 +1777,50 @@ genX(upload_vs_state)(struct brw_context *brw)
> >     }
> >  #endif
> >  
> >     if (GEN_GEN == 7 && devinfo->is_ivybridge)
> >        gen7_emit_vs_workaround_flush(brw);
> >  
> > +#if GEN_GEN >= 6
> >     brw_batch_emit(brw, GENX(3DSTATE_VS), vs) {
> > +#else
> > +   ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
> > +   brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) {
> > +#endif
> >        INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
> >  
> >        vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
> >  
> > +#if GEN_GEN < 6
> > +      vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1;
> > +      vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length;
> > +      vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2;
> > +
> > +      vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GEN_GEN == 5 ? 2 : 0);
> 
> I was looking and it seems that NumberofURBEntries should be bits 11:17, while
> bit 18 should be MBZ. It shouldn't matter much here, just the checks that
> genX_pack.h do. Also it's my fault, since I submitted the xml without checking
> everything, so I can send a patch to fix that if you prefer.
> 
> Anyway, this patch is:
> Reviewed-by: Rafael Antognolli <rafael.antognolli at intel.com>
> 
> > +      vs.URBEntryAllocationSize = brw->urb.vsize - 1;
> > +
> > +      vs.MaximumNumberofThreads =
> > +         CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1;
> > +
> > +      vs.StatisticsEnable = false;
> > +      vs.SamplerStateOffset =
> > +         instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset);
> > +#endif
> > +
> > +#if GEN_GEN == 5
> > +      /* Force single program flow on Ironlake.  We cannot reliably get
> > +       * all applications working without it.  See:
> > +       * https://bugs.freedesktop.org/show_bug.cgi?id=29172
> > +       *
> > +       * The most notable and reliably failing application is the Humus
> > +       * demo "CelShading"
> > +       */
> > +      vs.SingleProgramFlow = true;
> > +      vs.SamplerCount = 0; /* hardware requirement */
> > +#endif
> > +
> >  #if GEN_GEN >= 8
> >        vs.SIMD8DispatchEnable =
> >           vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
> >  
> >        vs.UserClipDistanceCullTestEnableBitmask =
> >           vue_prog_data->cull_distance_mask;
> > @@ -1798,17 +1856,21 @@ static const struct brw_tracked_state genX(vs_state) = {
> >     .dirty = {
> >        .mesa  = (GEN_GEN == 6 ? (_NEW_PROGRAM_CONSTANTS | _NEW_TRANSFORM) : 0),
> >        .brw   = BRW_NEW_BATCH |
> >                 BRW_NEW_BLORP |
> >                 BRW_NEW_CONTEXT |
> >                 BRW_NEW_VS_PROG_DATA |
> > -               (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0),
> > +               (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) |
> > +               (GEN_GEN <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> > +                               BRW_NEW_PROGRAM_CACHE |
> > +                               BRW_NEW_SAMPLER_STATE_TABLE |
> > +                               BRW_NEW_URB_FENCE
> > +                             : 0),
> >     },
> >     .emit = genX(upload_vs_state),
> >  };
> > -#endif
> >  
> >  /* ---------------------------------------------------------------------- */
> >  
> >  #if GEN_GEN >= 6
> >  static void
> >  brw_calculate_guardband_size(const struct gen_device_info *devinfo,
> > @@ -3881,13 +3943,13 @@ genX(init_atoms)(struct brw_context *brw)
> >        &brw_vs_samplers,
> >  
> >        /* These set up state for brw_psp_urb_cbs */
> >        &brw_wm_unit,
> >        &brw_sf_vp,
> >        &brw_sf_unit,
> > -      &brw_vs_unit,		/* always required, enabled or not */
> > +      &genX(vs_state), /* always required, enabled or not */
> >        &brw_clip_unit,
> >        &brw_gs_unit,
> >  
> >        /* Command packets:
> >         */
> >        &brw_invariant_state,
> > -- 
> > 2.12.2
> > 
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list