[Mesa-dev] [PATCH v2] i965: Convert CLIP_STATE to genxml.

Kenneth Graunke kenneth at whitecape.org
Fri Jul 14 18:55:55 UTC 2017


On Friday, July 14, 2017 11:02:57 AM PDT you wrote:
> Add the code into its own function and atom, since almost nothing is
> shared with GEN >= 6.
> 
> v2: Split GEN <=5 and GEN >= 6 into separate functions (Ken).
> 
> Signed-off-by: Rafael Antognolli <rafael.antognolli at intel.com>
> Cc: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/Makefile.sources    |   1 -
>  src/mesa/drivers/dri/i965/brw_clip_state.c    | 147 --------------------------
>  src/mesa/drivers/dri/i965/brw_structs.h       |  65 ------------
>  src/mesa/drivers/dri/i965/genX_state_upload.c | 103 +++++++++++++++++-
>  4 files changed, 101 insertions(+), 215 deletions(-)
>  delete mode 100644 src/mesa/drivers/dri/i965/brw_clip_state.c
> 
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
> index fc46645..60a41f8 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -6,7 +6,6 @@ i965_FILES = \
>  	brw_bufmgr.h \
>  	brw_clear.c \
>  	brw_clip.c \
> -	brw_clip_state.c \
>  	brw_compute.c \
>  	brw_conditional_render.c \
>  	brw_context.c \
> diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
> deleted file mode 100644
> index 8f22c0f..0000000
> --- a/src/mesa/drivers/dri/i965/brw_clip_state.c
> +++ /dev/null
> @@ -1,147 +0,0 @@
> -/*
> - Copyright (C) Intel Corp.  2006.  All Rights Reserved.
> - Intel funded Tungsten Graphics to
> - develop this 3D driver.
> -
> - Permission is hereby granted, free of charge, to any person obtaining
> - a copy of this software and associated documentation files (the
> - "Software"), to deal in the Software without restriction, including
> - without limitation the rights to use, copy, modify, merge, publish,
> - distribute, sublicense, and/or sell copies of the Software, and to
> - permit persons to whom the Software is furnished to do so, subject to
> - the following conditions:
> -
> - The above copyright notice and this permission notice (including the
> - next paragraph) shall be included in all copies or substantial
> - portions of the Software.
> -
> - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
> - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
> - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
> - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
> - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> -
> - **********************************************************************/
> - /*
> -  * Authors:
> -  *   Keith Whitwell <keithw at vmware.com>
> -  */
> -
> -#include "intel_batchbuffer.h"
> -#include "brw_context.h"
> -#include "brw_state.h"
> -#include "brw_defines.h"
> -#include "main/framebuffer.h"
> -
> -static void
> -brw_upload_clip_unit(struct brw_context *brw)
> -{
> -   struct gl_context *ctx = &brw->ctx;
> -   struct brw_clip_unit_state *clip;
> -
> -   clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset);
> -   memset(clip, 0, sizeof(*clip));
> -
> -   /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_CLIP_PROG_DATA */
> -   clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
> -				 16 - 1);
> -   clip->thread0.kernel_start_pointer =
> -      brw_program_reloc(brw,
> -			brw->clip.state_offset +
> -			offsetof(struct brw_clip_unit_state, thread0),
> -			brw->clip.prog_offset +
> -			(clip->thread0.grf_reg_count << 1)) >> 6;
> -
> -   clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
> -   clip->thread1.single_program_flow = 1;
> -
> -   clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
> -   clip->thread3.const_urb_entry_read_length =
> -      brw->clip.prog_data->curb_read_length;
> -
> -   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
> -   clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
> -   clip->thread3.dispatch_grf_start_reg = 1;
> -   clip->thread3.urb_entry_read_offset = 0;
> -
> -   /* BRW_NEW_URB_FENCE */
> -   clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries;
> -   clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
> -   /* If we have enough clip URB entries to run two threads, do so.
> -    */
> -   if (brw->urb.nr_clip_entries >= 10) {
> -      /* Half of the URB entries go to each thread, and it has to be an
> -       * even number.
> -       */
> -      assert(brw->urb.nr_clip_entries % 2 == 0);
> -
> -      /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
> -       * only 2 threads can output VUEs at a time.
> -       */
> -      if (brw->gen == 5)
> -         clip->thread4.max_threads = 16 - 1;
> -      else
> -         clip->thread4.max_threads = 2 - 1;
> -   } else {
> -      assert(brw->urb.nr_clip_entries >= 5);
> -      clip->thread4.max_threads = 1 - 1;
> -   }
> -
> -   /* _NEW_TRANSFORM */
> -   if (brw->gen == 5 || brw->is_g4x)
> -      clip->clip5.userclip_enable_flags = ctx->Transform.ClipPlanesEnabled;
> -   else
> -      /* Up to 6 actual clip flags, plus the 7th for negative RHW workaround. */
> -      clip->clip5.userclip_enable_flags = (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40;
> -
> -   clip->clip5.userclip_must_clip = 1;
> -
> -   /* enable guardband clipping if we can */
> -   clip->clip5.guard_band_enable = 1;
> -   clip->clip6.clipper_viewport_state_ptr =
> -      (brw->batch.bo->offset64 + brw->clip.vp_offset) >> 5;
> -
> -   /* emit clip viewport relocation */
> -   brw_emit_reloc(&brw->batch,
> -                  (brw->clip.state_offset +
> -                   offsetof(struct brw_clip_unit_state, clip6)),
> -                  brw->batch.bo, brw->clip.vp_offset,
> -                  I915_GEM_DOMAIN_INSTRUCTION, 0);
> -
> -   /* _NEW_TRANSFORM */
> -   if (!ctx->Transform.DepthClamp)
> -      clip->clip5.viewport_z_clip_enable = 1;
> -   clip->clip5.viewport_xy_clip_enable = 1;
> -   clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
> -   if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
> -      clip->clip5.api_mode = BRW_CLIP_API_DX;
> -   else
> -      clip->clip5.api_mode = BRW_CLIP_API_OGL;
> -   clip->clip5.clip_mode = brw->clip.prog_data->clip_mode;
> -
> -   if (brw->is_g4x)
> -      clip->clip5.negative_w_clip_test = 1;
> -
> -   clip->viewport_xmin = -1;
> -   clip->viewport_xmax = 1;
> -   clip->viewport_ymin = -1;
> -   clip->viewport_ymax = 1;
> -
> -   brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
> -}
> -
> -const struct brw_tracked_state brw_clip_unit = {
> -   .dirty = {
> -      .mesa  = _NEW_TRANSFORM |
> -               _NEW_VIEWPORT,
> -      .brw   = BRW_NEW_BATCH |
> -               BRW_NEW_BLORP |
> -               BRW_NEW_CLIP_PROG_DATA |
> -               BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> -               BRW_NEW_PROGRAM_CACHE |
> -               BRW_NEW_URB_FENCE,
> -   },
> -   .emit = brw_upload_clip_unit,
> -};
> diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
> index 6feab0d..5a0d91d 100644
> --- a/src/mesa/drivers/dri/i965/brw_structs.h
> +++ b/src/mesa/drivers/dri/i965/brw_structs.h
> @@ -115,71 +115,6 @@ struct thread3
>     unsigned pad3:1;
>  };
>  
> -
> -
> -struct brw_clip_unit_state
> -{
> -   struct thread0 thread0;
> -   struct
> -   {
> -      unsigned pad0:7;
> -      unsigned sw_exception_enable:1;
> -      unsigned pad1:3;
> -      unsigned mask_stack_exception_enable:1;
> -      unsigned pad2:1;
> -      unsigned illegal_op_exception_enable:1;
> -      unsigned pad3:2;
> -      unsigned floating_point_mode:1;
> -      unsigned thread_priority:1;
> -      unsigned binding_table_entry_count:8;
> -      unsigned pad4:5;
> -      unsigned single_program_flow:1;
> -   } thread1;
> -
> -   struct thread2 thread2;
> -   struct thread3 thread3;
> -
> -   struct
> -   {
> -      unsigned pad0:9;
> -      unsigned gs_output_stats:1; /* not always */
> -      unsigned stats_enable:1;
> -      unsigned nr_urb_entries:7;
> -      unsigned pad1:1;
> -      unsigned urb_entry_allocation_size:5;
> -      unsigned pad2:1;
> -      unsigned max_threads:5; 	/* may be less */
> -      unsigned pad3:2;
> -   } thread4;
> -
> -   struct
> -   {
> -      unsigned pad0:13;
> -      unsigned clip_mode:3;
> -      unsigned userclip_enable_flags:8;
> -      unsigned userclip_must_clip:1;
> -      unsigned negative_w_clip_test:1;
> -      unsigned guard_band_enable:1;
> -      unsigned viewport_z_clip_enable:1;
> -      unsigned viewport_xy_clip_enable:1;
> -      unsigned vertex_position_space:1;
> -      unsigned api_mode:1;
> -      unsigned pad2:1;
> -   } clip5;
> -
> -   struct
> -   {
> -      unsigned pad0:5;
> -      unsigned clipper_viewport_state_ptr:27;
> -   } clip6;
> -
> -
> -   float viewport_xmin;
> -   float viewport_xmax;
> -   float viewport_ymin;
> -   float viewport_ymax;
> -};
> -
>  struct brw_wm_unit_state
>  {
>     struct thread0 thread0;
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index 54a547c..b71ca46 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -1268,7 +1268,106 @@ static const struct brw_tracked_state genX(depth_stencil_state) = {
>  
>  /* ---------------------------------------------------------------------- */
>  
> -#if GEN_GEN >= 6
> +#if GEN_GEN <= 5
> +
> +static void
> +genX(upload_clip_state)(struct brw_context *brw)
> +{
> +   struct gl_context *ctx = &brw->ctx;
> +
> +   ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
> +   brw_state_emit(brw, GENX(CLIP_STATE), 32, &brw->clip.state_offset, clip) {
> +      clip.KernelStartPointer = KSP_ro(brw, brw->clip.prog_offset);
> +      clip.GRFRegisterCount =
> +         DIV_ROUND_UP(brw->clip.prog_data->total_grf, 16) - 1;
> +      clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
> +      clip.SingleProgramFlow = true;
> +      clip.VertexURBEntryReadLength = brw->clip.prog_data->urb_read_length;
> +      clip.ConstantURBEntryReadLength = brw->clip.prog_data->curb_read_length;
> +
> +      /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
> +      clip.ConstantURBEntryReadOffset = brw->curbe.clip_start * 2;
> +      clip.DispatchGRFStartRegisterForURBData = 1;
> +      clip.VertexURBEntryReadOffset = 0;
> +
> +      /* BRW_NEW_URB_FENCE */
> +      clip.NumberofURBEntries = brw->urb.nr_clip_entries;
> +      clip.URBEntryAllocationSize = brw->urb.vsize - 1;
> +
> +      if (brw->urb.nr_clip_entries >= 10) {
> +         /* Half of the URB entries go to each thread, and it has to be an
> +          * even number.
> +          */
> +         assert(brw->urb.nr_clip_entries % 2 == 0);
> +
> +         /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
> +          * only 2 threads can output VUEs at a time.
> +          */
> +         clip.MaximumNumberofThreads = GEN_GEN == 5 ? 16 - 1 : 2 - 1;

Could we write this as:

         clip.MaximumNumberofThreads = (GEN_GEN == 5 ? 16 : 2) - 1;

I think it might be a little easier to read - 16 threads on Gen5,
2 threads on Gen4/G45, and minus 1 because of the encoding.

> +      } else {
> +         assert(brw->urb.nr_clip_entries >= 5);
> +         clip.MaximumNumberofThreads = 1 - 1;
> +      }
> +
> +      clip.VertexPositionSpace = VPOS_NDCSPACE;
> +      clip.UserClipFlagsMustClipEnable = true;
> +      clip.GuardbandClipTestEnable = true;
> +
> +      clip.ClipperViewportStatePointer =
> +         instruction_ro_bo(brw->batch.bo, brw->clip.vp_offset);
> +
> +      clip.ScreenSpaceViewportXMin = -1;
> +      clip.ScreenSpaceViewportXMax = 1;
> +      clip.ScreenSpaceViewportYMin = -1;
> +      clip.ScreenSpaceViewportYMax = 1;
> +
> +      clip.ViewportZClipTestEnable = !ctx->Transform.DepthClamp;
> +
> +      /* _NEW_TRANSFORM */
> +      if (GEN_GEN == 5 || GEN_IS_G4X) {
> +         clip.UserClipDistanceClipTestEnableBitmask =
> +            ctx->Transform.ClipPlanesEnabled;
> +      } else {

Let's keep the comment - mentioning negative RHW is useful so people know
why we're doing this crazy thing.

         /* Up to 6 actual clip flags, plus the 7th for the negative RHW
          * workaround.
          */

> +         clip.UserClipDistanceClipTestEnableBitmask =
> +            (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40;
> +      }
> +
> +      if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
> +         clip.APIMode = APIMODE_D3D;
> +      else
> +         clip.APIMode = APIMODE_OGL;
> +
> +      clip.GuardbandClipTestEnable = true;
> +
> +      clip.ClipMode = brw->clip.prog_data->clip_mode;
> +
> +#if GEN_IS_G4X
> +      clip.NegativeWClipTestEnable = true;
> +#endif
> +
> +      /* _NEW_POLYGON,
> +       * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
> +       */

Looks like this comment got imported from future gens...setting something
to "true" definitely doesn't have any state dependencies...and we don't
have tessellation shaders on Gen4-5...nor even real GS enabled.  Let's
just drop the comment.

Thanks, this looks great!

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

> +      clip.ViewportXYClipTestEnable = true;
> +   }
> +}
> +
> +const struct brw_tracked_state genX(clip_state) = {
> +   .dirty = {
> +      .mesa  = _NEW_TRANSFORM |
> +               _NEW_VIEWPORT,
> +      .brw   = BRW_NEW_BATCH |
> +               BRW_NEW_BLORP |
> +               BRW_NEW_CLIP_PROG_DATA |
> +               BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> +               BRW_NEW_PROGRAM_CACHE |
> +               BRW_NEW_URB_FENCE,
> +   },
> +   .emit = genX(upload_clip_state),
> +};
> +
> +#else
> +
>  static void
>  genX(upload_clip_state)(struct brw_context *brw)
>  {
> @@ -5123,7 +5222,7 @@ genX(init_atoms)(struct brw_context *brw)
>        &genX(sf_clip_viewport),
>        &genX(sf_state),
>        &genX(vs_state), /* always required, enabled or not */
> -      &brw_clip_unit,
> +      &genX(clip_state),
>        &genX(gs_state),
>  
>        /* Command packets:
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170714/2d850e31/attachment.sig>


More information about the mesa-dev mailing list