[Mesa-dev] [PATCH 18/18] i965: Convert WM_STATE to genxml on gen4-5.

Kristian Høgsberg hoegsberg at gmail.com
Mon Jun 19 19:09:00 UTC 2017


On Mon, Jun 19, 2017 at 11:17 AM, Rafael Antognolli
<rafael.antognolli at intel.com> wrote:
> On Mon, Jun 19, 2017 at 09:46:30AM -0700, Kristian Høgsberg wrote:
>> On Fri, Jun 16, 2017 at 4:31 PM, Rafael Antognolli
>> <rafael.antognolli at intel.com> wrote:
>> > The code doesn't get exactly a lot simpler but at least it is in a single
>> > place, and we delete more than we add.
>>
>> Another good point is that you get rid of struct brw_wm_unit_state
>> which was a third mechanism for encoding GEN state. We used to have
>> GENXML, manual packing and these bitfield structs. Now we're down to
>> just GENXML and some manual packing.
>
> Nice, I think I can add this to the commit message if you don't mind :)

Please do, that's why I brought it up ;-)

>> Kristian
>>
>> >
>> > Signed-off-by: Rafael Antognolli <rafael.antognolli at intel.com>
>> > ---
>> >  src/mesa/drivers/dri/i965/Makefile.sources    |   1 -
>> >  src/mesa/drivers/dri/i965/brw_state.h         |   1 -
>> >  src/mesa/drivers/dri/i965/brw_structs.h       | 121 ------------
>> >  src/mesa/drivers/dri/i965/brw_wm.h            |   2 -
>> >  src/mesa/drivers/dri/i965/brw_wm_state.c      | 274 --------------------------
>> >  src/mesa/drivers/dri/i965/genX_state_upload.c | 191 ++++++++++++++----
>> >  6 files changed, 153 insertions(+), 437 deletions(-)
>> >  delete mode 100644 src/mesa/drivers/dri/i965/brw_wm_state.c
>> >
>> > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
>> > index 89be92e..c15b3ef 100644
>> > --- a/src/mesa/drivers/dri/i965/Makefile.sources
>> > +++ b/src/mesa/drivers/dri/i965/Makefile.sources
>> > @@ -61,7 +61,6 @@ i965_FILES = \
>> >         brw_vs_surface_state.c \
>> >         brw_wm.c \
>> >         brw_wm.h \
>> > -       brw_wm_state.c \
>> >         brw_wm_surface_state.c \
>> >         gen4_blorp_exec.h \
>> >         gen6_clip_state.c \
>> > diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
>> > index 8f3bd7f..9588a51 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_state.h
>> > +++ b/src/mesa/drivers/dri/i965/brw_state.h
>> > @@ -89,7 +89,6 @@ extern const struct brw_tracked_state brw_wm_image_surfaces;
>> >  extern const struct brw_tracked_state brw_cs_ubo_surfaces;
>> >  extern const struct brw_tracked_state brw_cs_abo_surfaces;
>> >  extern const struct brw_tracked_state brw_cs_image_surfaces;
>> > -extern const struct brw_tracked_state brw_wm_unit;
>> >
>> >  extern const struct brw_tracked_state brw_psp_urb_cbs;
>> >
>> > diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
>> > index 5a0d91d..fb592be 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_structs.h
>> > +++ b/src/mesa/drivers/dri/i965/brw_structs.h
>> > @@ -65,127 +65,6 @@ struct brw_urb_fence
>> >     } bits1;
>> >  };
>> >
>> > -/* State structs for the various fixed function units:
>> > - */
>> > -
>> > -
>> > -struct thread0
>> > -{
>> > -   unsigned pad0:1;
>> > -   unsigned grf_reg_count:3;
>> > -   unsigned pad1:2;
>> > -   unsigned kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
>> > -};
>> > -
>> > -struct thread1
>> > -{
>> > -   unsigned ext_halt_exception_enable:1;
>> > -   unsigned sw_exception_enable:1;
>> > -   unsigned mask_stack_exception_enable:1;
>> > -   unsigned timeout_exception_enable:1;
>> > -   unsigned illegal_op_exception_enable:1;
>> > -   unsigned pad0:3;
>> > -   unsigned depth_coef_urb_read_offset:6;      /* WM only */
>> > -   unsigned pad1:2;
>> > -   unsigned floating_point_mode:1;
>> > -   unsigned thread_priority:1;
>> > -   unsigned binding_table_entry_count:8;
>> > -   unsigned pad3:5;
>> > -   unsigned single_program_flow:1;
>> > -};
>> > -
>> > -struct thread2
>> > -{
>> > -   unsigned per_thread_scratch_space:4;
>> > -   unsigned pad0:6;
>> > -   unsigned scratch_space_base_pointer:22;
>> > -};
>> > -
>> > -
>> > -struct thread3
>> > -{
>> > -   unsigned dispatch_grf_start_reg:4;
>> > -   unsigned urb_entry_read_offset:6;
>> > -   unsigned pad0:1;
>> > -   unsigned urb_entry_read_length:6;
>> > -   unsigned pad1:1;
>> > -   unsigned const_urb_entry_read_offset:6;
>> > -   unsigned pad2:1;
>> > -   unsigned const_urb_entry_read_length:6;
>> > -   unsigned pad3:1;
>> > -};
>> > -
>> > -struct brw_wm_unit_state
>> > -{
>> > -   struct thread0 thread0;
>> > -   struct thread1 thread1;
>> > -   struct thread2 thread2;
>> > -   struct thread3 thread3;
>> > -
>> > -   struct {
>> > -      unsigned stats_enable:1;
>> > -      unsigned depth_buffer_clear:1;
>> > -      unsigned sampler_count:3;
>> > -      unsigned sampler_state_pointer:27;
>> > -   } wm4;
>> > -
>> > -   struct
>> > -   {
>> > -      unsigned enable_8_pix:1;
>> > -      unsigned enable_16_pix:1;
>> > -      unsigned enable_32_pix:1;
>> > -      unsigned enable_con_32_pix:1;
>> > -      unsigned enable_con_64_pix:1;
>> > -      unsigned pad0:1;
>> > -
>> > -      /* These next four bits are for Ironlake+ */
>> > -      unsigned fast_span_coverage_enable:1;
>> > -      unsigned depth_buffer_clear:1;
>> > -      unsigned depth_buffer_resolve_enable:1;
>> > -      unsigned hierarchical_depth_buffer_resolve_enable:1;
>> > -
>> > -      unsigned legacy_global_depth_bias:1;
>> > -      unsigned line_stipple:1;
>> > -      unsigned depth_offset:1;
>> > -      unsigned polygon_stipple:1;
>> > -      unsigned line_aa_region_width:2;
>> > -      unsigned line_endcap_aa_region_width:2;
>> > -      unsigned early_depth_test:1;
>> > -      unsigned thread_dispatch_enable:1;
>> > -      unsigned program_uses_depth:1;
>> > -      unsigned program_computes_depth:1;
>> > -      unsigned program_uses_killpixel:1;
>> > -      unsigned legacy_line_rast: 1;
>> > -      unsigned transposed_urb_read_enable:1;
>> > -      unsigned max_threads:7;
>> > -   } wm5;
>> > -
>> > -   float global_depth_offset_constant;
>> > -   float global_depth_offset_scale;
>> > -
>> > -   /* for Ironlake only */
>> > -   struct {
>> > -      unsigned pad0:1;
>> > -      unsigned grf_reg_count_1:3;
>> > -      unsigned pad1:2;
>> > -      unsigned kernel_start_pointer_1:26;
>> > -   } wm8;
>> > -
>> > -   struct {
>> > -      unsigned pad0:1;
>> > -      unsigned grf_reg_count_2:3;
>> > -      unsigned pad1:2;
>> > -      unsigned kernel_start_pointer_2:26;
>> > -   } wm9;
>> > -
>> > -   struct {
>> > -      unsigned pad0:1;
>> > -      unsigned grf_reg_count_3:3;
>> > -      unsigned pad1:2;
>> > -      unsigned kernel_start_pointer_3:26;
>> > -   } wm10;
>> > -};
>> > -
>> >  struct gen5_sampler_default_color {
>> >     uint8_t ub[4];
>> >     float f[4];
>> > diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
>> > index 613172a..113cdf3 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_wm.h
>> > +++ b/src/mesa/drivers/dri/i965/brw_wm.h
>> > @@ -41,8 +41,6 @@
>> >  extern "C" {
>> >  #endif
>> >
>> > -bool brw_color_buffer_write_enabled(struct brw_context *brw);
>> > -
>> >  void
>> >  brw_upload_wm_prog(struct brw_context *brw);
>> >
>> > diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
>> > deleted file mode 100644
>> > index 69bbeb2..0000000
>> > --- a/src/mesa/drivers/dri/i965/brw_wm_state.c
>> > +++ /dev/null
>> > @@ -1,274 +0,0 @@
>> > -/*
>> > - Copyright (C) Intel Corp.  2006.  All Rights Reserved.
>> > - Intel funded Tungsten Graphics to
>> > - develop this 3D driver.
>> > -
>> > - Permission is hereby granted, free of charge, to any person obtaining
>> > - a copy of this software and associated documentation files (the
>> > - "Software"), to deal in the Software without restriction, including
>> > - without limitation the rights to use, copy, modify, merge, publish,
>> > - distribute, sublicense, and/or sell copies of the Software, and to
>> > - permit persons to whom the Software is furnished to do so, subject to
>> > - the following conditions:
>> > -
>> > - The above copyright notice and this permission notice (including the
>> > - next paragraph) shall be included in all copies or substantial
>> > - portions of the Software.
>> > -
>> > - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> > - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> > - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
>> > - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
>> > - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
>> > - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
>> > - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>> > -
>> > - **********************************************************************/
>> > - /*
>> > -  * Authors:
>> > -  *   Keith Whitwell <keithw at vmware.com>
>> > -  */
>> > -
>> > -
>> > -
>> > -#include "intel_batchbuffer.h"
>> > -#include "intel_fbo.h"
>> > -#include "brw_context.h"
>> > -#include "brw_state.h"
>> > -#include "brw_defines.h"
>> > -#include "brw_wm.h"
>> > -#include "compiler/nir/nir.h"
>> > -
>> > -/***********************************************************************
>> > - * WM unit - fragment programs and rasterization
>> > - */
>> > -
>> > -bool
>> > -brw_color_buffer_write_enabled(struct brw_context *brw)
>> > -{
>> > -   struct gl_context *ctx = &brw->ctx;
>> > -   /* BRW_NEW_FRAGMENT_PROGRAM */
>> > -   const struct gl_program *fp = brw->fragment_program;
>> > -   unsigned i;
>> > -
>> > -   /* _NEW_BUFFERS */
>> > -   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
>> > -      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
>> > -      uint64_t outputs_written = fp->info.outputs_written;
>> > -
>> > -      /* _NEW_COLOR */
>> > -      if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
>> > -                outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
>> > -         (ctx->Color.ColorMask[i][0] ||
>> > -          ctx->Color.ColorMask[i][1] ||
>> > -          ctx->Color.ColorMask[i][2] ||
>> > -          ctx->Color.ColorMask[i][3])) {
>> > -        return true;
>> > -      }
>> > -   }
>> > -
>> > -   return false;
>> > -}
>> > -
>> > -/**
>> > - * Setup wm hardware state.  See page 225 of Volume 2
>> > - */
>> > -static void
>> > -brw_upload_wm_unit(struct brw_context *brw)
>> > -{
>> > -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>> > -   struct gl_context *ctx = &brw->ctx;
>> > -   /* BRW_NEW_FRAGMENT_PROGRAM */
>> > -   const struct gl_program *fp = brw->fragment_program;
>> > -   /* BRW_NEW_FS_PROG_DATA */
>> > -   const struct brw_wm_prog_data *prog_data =
>> > -      brw_wm_prog_data(brw->wm.base.prog_data);
>> > -   struct brw_wm_unit_state *wm;
>> > -
>> > -   wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.base.state_offset);
>> > -   memset(wm, 0, sizeof(*wm));
>> > -
>> > -   if (prog_data->dispatch_8 && prog_data->dispatch_16) {
>> > -      /* These two fields should be the same pre-gen6, which is why we
>> > -       * only have one hardware field to program for both dispatch
>> > -       * widths.
>> > -       */
>> > -      assert(prog_data->base.dispatch_grf_start_reg ==
>> > -            prog_data->dispatch_grf_start_reg_2);
>> > -   }
>> > -
>> > -   /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_FS_PROG_DATA */
>> > -   wm->wm5.enable_8_pix = prog_data->dispatch_8;
>> > -   wm->wm5.enable_16_pix = prog_data->dispatch_16;
>> > -
>> > -   if (prog_data->dispatch_8 || prog_data->dispatch_16) {
>> > -      wm->thread0.grf_reg_count = prog_data->reg_blocks_0;
>> > -      wm->thread0.kernel_start_pointer =
>> > -         brw_program_reloc(brw,
>> > -                           brw->wm.base.state_offset +
>> > -                           offsetof(struct brw_wm_unit_state, thread0),
>> > -                           brw->wm.base.prog_offset +
>> > -                           (wm->thread0.grf_reg_count << 1)) >> 6;
>> > -   }
>> > -
>> > -   if (prog_data->prog_offset_2) {
>> > -      wm->wm9.grf_reg_count_2 = prog_data->reg_blocks_2;
>> > -      wm->wm9.kernel_start_pointer_2 =
>> > -         brw_program_reloc(brw,
>> > -                           brw->wm.base.state_offset +
>> > -                           offsetof(struct brw_wm_unit_state, wm9),
>> > -                           brw->wm.base.prog_offset +
>> > -                           prog_data->prog_offset_2 +
>> > -                           (wm->wm9.grf_reg_count_2 << 1)) >> 6;
>> > -   }
>> > -
>> > -   wm->thread1.depth_coef_urb_read_offset = 1;
>> > -   if (prog_data->base.use_alt_mode)
>> > -      wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
>> > -   else
>> > -      wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
>> > -
>> > -   wm->thread1.binding_table_entry_count =
>> > -      prog_data->base.binding_table.size_bytes / 4;
>> > -
>> > -   if (prog_data->base.total_scratch != 0) {
>> > -      wm->thread2.scratch_space_base_pointer =
>> > -        brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */
>> > -      wm->thread2.per_thread_scratch_space =
>> > -        ffs(brw->wm.base.per_thread_scratch) - 11;
>> > -   } else {
>> > -      wm->thread2.scratch_space_base_pointer = 0;
>> > -      wm->thread2.per_thread_scratch_space = 0;
>> > -   }
>> > -
>> > -   wm->thread3.dispatch_grf_start_reg =
>> > -      prog_data->base.dispatch_grf_start_reg;
>> > -   wm->thread3.urb_entry_read_length =
>> > -      prog_data->num_varying_inputs * 2;
>> > -   wm->thread3.urb_entry_read_offset = 0;
>> > -   wm->thread3.const_urb_entry_read_length =
>> > -      prog_data->base.curb_read_length;
>> > -   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
>> > -   wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
>> > -
>> > -   if (brw->gen == 5)
>> > -      wm->wm4.sampler_count = 0; /* hardware requirement */
>> > -   else {
>> > -      wm->wm4.sampler_count = (brw->wm.base.sampler_count + 1) / 4;
>> > -   }
>> > -
>> > -   if (brw->wm.base.sampler_count) {
>> > -      /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */
>> > -      wm->wm4.sampler_state_pointer = (brw->batch.bo->offset64 +
>> > -                                      brw->wm.base.sampler_offset) >> 5;
>> > -   } else {
>> > -      wm->wm4.sampler_state_pointer = 0;
>> > -   }
>> > -
>> > -   /* BRW_NEW_FRAGMENT_PROGRAM */
>> > -   wm->wm5.program_uses_depth = prog_data->uses_src_depth;
>> > -   wm->wm5.program_computes_depth = (fp->info.outputs_written &
>> > -                                    BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
>> > -   /* _NEW_BUFFERS
>> > -    * Override for NULL depthbuffer case, required by the Pixel Shader Computed
>> > -    * Depth field.
>> > -    */
>> > -   if (!intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH))
>> > -      wm->wm5.program_computes_depth = 0;
>> > -
>> > -   /* _NEW_COLOR */
>> > -   wm->wm5.program_uses_killpixel =
>> > -      prog_data->uses_kill || ctx->Color.AlphaEnabled;
>> > -
>> > -   wm->wm5.max_threads = devinfo->max_wm_threads - 1;
>> > -
>> > -   /* _NEW_BUFFERS | _NEW_COLOR */
>> > -   if (brw_color_buffer_write_enabled(brw) ||
>> > -       wm->wm5.program_uses_killpixel ||
>> > -       wm->wm5.program_computes_depth) {
>> > -      wm->wm5.thread_dispatch_enable = 1;
>> > -   }
>> > -
>> > -   wm->wm5.legacy_line_rast = 0;
>> > -   wm->wm5.legacy_global_depth_bias = 0;
>> > -   wm->wm5.early_depth_test = 1;               /* never need to disable */
>> > -   wm->wm5.line_aa_region_width = 0;
>> > -   wm->wm5.line_endcap_aa_region_width = 1;
>> > -
>> > -   /* _NEW_POLYGONSTIPPLE */
>> > -   wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag;
>> > -
>> > -   /* _NEW_POLYGON */
>> > -   if (ctx->Polygon.OffsetFill) {
>> > -      wm->wm5.depth_offset = 1;
>> > -      /* Something weird going on with legacy_global_depth_bias,
>> > -       * offset_constant, scaling and MRD.  This value passes glean
>> > -       * but gives some odd results elsewere (eg. the
>> > -       * quad-offset-units test).
>> > -       */
>> > -      wm->global_depth_offset_constant = ctx->Polygon.OffsetUnits * 2;
>> > -
>> > -      /* This is the only value that passes glean:
>> > -       */
>> > -      wm->global_depth_offset_scale = ctx->Polygon.OffsetFactor;
>> > -   }
>> > -
>> > -   /* _NEW_LINE */
>> > -   wm->wm5.line_stipple = ctx->Line.StippleFlag;
>> > -
>> > -   /* BRW_NEW_STATS_WM */
>> > -   if (brw->stats_wm)
>> > -      wm->wm4.stats_enable = 1;
>> > -
>> > -   /* Emit scratch space relocation */
>> > -   if (prog_data->base.total_scratch != 0) {
>> > -      brw_emit_reloc(&brw->batch,
>> > -                     brw->wm.base.state_offset +
>> > -                     offsetof(struct brw_wm_unit_state, thread2),
>> > -                     brw->wm.base.scratch_bo,
>> > -                     wm->thread2.per_thread_scratch_space,
>> > -                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
>> > -   }
>> > -
>> > -   /* Emit sampler state relocation */
>> > -   if (brw->wm.base.sampler_count != 0) {
>> > -      brw_emit_reloc(&brw->batch,
>> > -                     brw->wm.base.state_offset +
>> > -                     offsetof(struct brw_wm_unit_state, wm4),
>> > -                     brw->batch.bo,
>> > -                     brw->wm.base.sampler_offset | wm->wm4.stats_enable |
>> > -                     (wm->wm4.sampler_count << 2),
>> > -                     I915_GEM_DOMAIN_INSTRUCTION, 0);
>> > -   }
>> > -
>> > -   brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
>> > -
>> > -   /* _NEW_POLGYON */
>> > -   if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
>> > -      BEGIN_BATCH(2);
>> > -      OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
>> > -      OUT_BATCH_F(ctx->Polygon.OffsetClamp);
>> > -      ADVANCE_BATCH();
>> > -
>> > -      brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
>> > -   }
>> > -}
>> > -
>> > -const struct brw_tracked_state brw_wm_unit = {
>> > -   .dirty = {
>> > -      .mesa = _NEW_BUFFERS |
>> > -              _NEW_COLOR |
>> > -              _NEW_LINE |
>> > -              _NEW_POLYGON |
>> > -              _NEW_POLYGONSTIPPLE,
>> > -      .brw = BRW_NEW_BATCH |
>> > -             BRW_NEW_BLORP |
>> > -             BRW_NEW_PUSH_CONSTANT_ALLOCATION |
>> > -             BRW_NEW_FRAGMENT_PROGRAM |
>> > -             BRW_NEW_FS_PROG_DATA |
>> > -             BRW_NEW_PROGRAM_CACHE |
>> > -             BRW_NEW_SAMPLER_STATE_TABLE |
>> > -             BRW_NEW_STATS_WM,
>> > -   },
>> > -   .emit = brw_upload_wm_unit,
>> > -};
>> > diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
>> > index 4ff5394..bc64c5d 100644
>> > --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
>> > +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
>> > @@ -1713,7 +1713,33 @@ static const struct brw_tracked_state genX(sf_state) = {
>> >
>> >  /* ---------------------------------------------------------------------- */
>> >
>> > -#if GEN_GEN >= 6
>> > +static bool
>> > +brw_color_buffer_write_enabled(struct brw_context *brw)
>> > +{
>> > +   struct gl_context *ctx = &brw->ctx;
>> > +   /* BRW_NEW_FRAGMENT_PROGRAM */
>> > +   const struct gl_program *fp = brw->fragment_program;
>> > +   unsigned i;
>> > +
>> > +   /* _NEW_BUFFERS */
>> > +   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
>> > +      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
>> > +      uint64_t outputs_written = fp->info.outputs_written;
>> > +
>> > +      /* _NEW_COLOR */
>> > +      if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
>> > +                 outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
>> > +          (ctx->Color.ColorMask[i][0] ||
>> > +           ctx->Color.ColorMask[i][1] ||
>> > +           ctx->Color.ColorMask[i][2] ||
>> > +           ctx->Color.ColorMask[i][3])) {
>> > +         return true;
>> > +      }
>> > +   }
>> > +
>> > +   return false;
>> > +}
>> > +
>> >  static void
>> >  genX(upload_wm)(struct brw_context *brw)
>> >  {
>> > @@ -1725,11 +1751,10 @@ genX(upload_wm)(struct brw_context *brw)
>> >
>> >     UNUSED bool writes_depth =
>> >        wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
>> > +   UNUSED struct brw_stage_state *stage_state = &brw->wm.base;
>> > +   UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
>> >
>> > -#if GEN_GEN < 7
>> > -   const struct brw_stage_state *stage_state = &brw->wm.base;
>> > -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>> > -
>> > +#if GEN_GEN == 6
>> >     /* We can't fold this into gen6_upload_wm_push_constants(), because
>> >      * according to the SNB PRM, vol 2 part 1 section 7.2.2
>> >      * (3DSTATE_CONSTANT_PS [DevSNB]):
>> > @@ -1748,27 +1773,94 @@ genX(upload_wm)(struct brw_context *brw)
>> >     }
>> >  #endif
>> >
>> > +#if GEN_GEN >= 6
>> >     brw_batch_emit(brw, GENX(3DSTATE_WM), wm) {
>> > -      wm.StatisticsEnable = true;
>> >        wm.LineAntialiasingRegionWidth = _10pixels;
>> >        wm.LineEndCapAntialiasingRegionWidth = _05pixels;
>> >
>> > +      wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
>> > +      wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
>> > +#else
>> > +   ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
>> > +   brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, wm) {
>> > +      if (wm_prog_data->dispatch_8 && wm_prog_data->dispatch_16) {
>> > +         /* These two fields should be the same pre-gen6, which is why we
>> > +          * only have one hardware field to program for both dispatch
>> > +          * widths.
>> > +          */
>> > +         assert(wm_prog_data->base.dispatch_grf_start_reg ==
>> > +                wm_prog_data->dispatch_grf_start_reg_2);
>> > +      }
>> > +
>> > +      if (wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16)
>> > +         wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0;
>> > +
>> > +      if (stage_state->sampler_count)
>> > +         wm.SamplerStatePointer =
>> > +            instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset);
>> > +#if GEN_GEN == 5
>> > +      if (wm_prog_data->prog_offset_2)
>> > +         wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2;
>> > +#endif
>> > +
>> > +      wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;
>> > +      wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length;
>> > +      /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
>> > +      wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2;
>> > +      wm.EarlyDepthTestEnable = true;
>> > +      wm.LineAntialiasingRegionWidth = _05pixels;
>> > +      wm.LineEndCapAntialiasingRegionWidth = _10pixels;
>> > +
>> > +      /* _NEW_POLYGON */
>> > +      if (ctx->Polygon.OffsetFill) {
>> > +         wm.GlobalDepthOffsetEnable = true;
>> > +         /* Something weird going on with legacy_global_depth_bias,
>> > +          * offset_constant, scaling and MRD.  This value passes glean
>> > +          * but gives some odd results elsewere (eg. the
>> > +          * quad-offset-units test).
>> > +          */
>> > +         wm.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
>> > +
>> > +         /* This is the only value that passes glean:
>> > +         */
>> > +         wm.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
>> > +      }
>> > +
>> > +      wm.DepthCoefficientURBReadOffset = 1;
>> > +#endif
>> > +
>> > +      /* BRW_NEW_STATS_WM */
>> > +      wm.StatisticsEnable = GEN_GEN >= 6 || brw->stats_wm;
>> > +
>> >  #if GEN_GEN < 7
>> >        if (wm_prog_data->base.use_alt_mode)
>> > -         wm.FloatingPointMode = Alternate;
>> > +         wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
>> > +
>> > +      wm.SamplerCount = GEN_GEN == 5 ?
>> > +         0 : DIV_ROUND_UP(stage_state->sampler_count, 4);
>> >
>> > -      wm.SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4);
>> > -      wm.BindingTableEntryCount = wm_prog_data->base.binding_table.size_bytes / 4;
>> > +      wm.BindingTableEntryCount =
>> > +         wm_prog_data->base.binding_table.size_bytes / 4;
>> >        wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
>> >        wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
>> >        wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
>> >        wm.DispatchGRFStartRegisterForConstantSetupData0 =
>> >           wm_prog_data->base.dispatch_grf_start_reg;
>> > -      wm.DispatchGRFStartRegisterForConstantSetupData2 =
>> > -         wm_prog_data->dispatch_grf_start_reg_2;
>> > -      wm.KernelStartPointer0 = stage_state->prog_offset;
>> > -      wm.KernelStartPointer2 = stage_state->prog_offset +
>> > -         wm_prog_data->prog_offset_2;
>> > +      if (GEN_GEN == 6 ||
>> > +          wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) {
>> > +         wm.KernelStartPointer0 = KSP_ro(brw,
>> > +                                         stage_state->prog_offset);
>> > +      }
>> > +
>> > +#if GEN_GEN >= 5
>> > +      if (GEN_GEN == 6 || wm_prog_data->prog_offset_2) {
>> > +         wm.KernelStartPointer2 =
>> > +            KSP_ro(brw, stage_state->prog_offset +
>> > +                   wm_prog_data->prog_offset_2);
>> > +      }
>> > +#endif
>> > +
>> > +#if GEN_GEN == 6
>> >        wm.DualSourceBlendEnable =
>> >           wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) &&
>> >           ctx->Color.Blend[0]._UsesDualSrc;
>> > @@ -1792,42 +1884,34 @@ genX(upload_wm)(struct brw_context *brw)
>> >        else
>> >           wm.PositionXYOffsetSelect = POSOFFSET_NONE;
>> >
>> > +      wm.DispatchGRFStartRegisterForConstantSetupData2 =
>> > +         wm_prog_data->dispatch_grf_start_reg_2;
>> > +#endif
>> > +
>> >        if (wm_prog_data->base.total_scratch) {
>> >           wm.ScratchSpaceBasePointer =
>> > -            render_bo(stage_state->scratch_bo,
>> > -                      ffs(stage_state->per_thread_scratch) - 11);
>> > +            render_bo(stage_state->scratch_bo, 0);
>> > +         wm.PerThreadScratchSpace =
>> > +            ffs(stage_state->per_thread_scratch) - 11;
>> >        }
>> >
>> >        wm.PixelShaderComputedDepth = writes_depth;
>> >  #endif
>> >
>> > -      wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
>> > -
>> >        /* _NEW_LINE */
>> >        wm.LineStippleEnable = ctx->Line.StippleFlag;
>> >
>> >        /* _NEW_POLYGON */
>> >        wm.PolygonStippleEnable = ctx->Polygon.StippleFlag;
>> > -      wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
>> >
>> >  #if GEN_GEN < 8
>> > -      /* _NEW_BUFFERS */
>> > -      const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
>> >
>> > -      wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
>> > +#if GEN_GEN >= 6
>> >        wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
>> > -      if (wm_prog_data->uses_kill ||
>> > -          _mesa_is_alpha_test_enabled(ctx) ||
>> > -          _mesa_is_alpha_to_coverage_enabled(ctx) ||
>> > -          wm_prog_data->uses_omask) {
>> > -         wm.PixelShaderKillsPixel = true;
>> > -      }
>> >
>> > -      /* _NEW_BUFFERS | _NEW_COLOR */
>> > -      if (brw_color_buffer_write_enabled(brw) || writes_depth ||
>> > -          wm_prog_data->has_side_effects || wm.PixelShaderKillsPixel) {
>> > -         wm.ThreadDispatchEnable = true;
>> > -      }
>> > +      /* _NEW_BUFFERS */
>> > +      const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
>> > +
>> >        if (multisampled_fbo) {
>> >           /* _NEW_MULTISAMPLE */
>> >           if (ctx->Multisample.Enabled)
>> > @@ -1843,6 +1927,21 @@ genX(upload_wm)(struct brw_context *brw)
>> >           wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
>> >           wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
>> >        }
>> > +#endif
>> > +      wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
>> > +      if (wm_prog_data->uses_kill ||
>> > +          _mesa_is_alpha_test_enabled(ctx) ||
>> > +          _mesa_is_alpha_to_coverage_enabled(ctx) ||
>> > +          (GEN_GEN >= 6 && wm_prog_data->uses_omask)) {
>> > +         wm.PixelShaderKillsPixel = true;
>> > +      }
>> > +
>> > +      /* _NEW_BUFFERS | _NEW_COLOR */
>> > +      if (brw_color_buffer_write_enabled(brw) || writes_depth ||
>> > +          wm.PixelShaderKillsPixel ||
>> > +          (GEN_GEN >= 6 && wm_prog_data->has_side_effects)) {
>> > +         wm.ThreadDispatchEnable = true;
>> > +      }
>> >
>> >  #if GEN_GEN >= 7
>> >        wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
>> > @@ -1873,6 +1972,16 @@ genX(upload_wm)(struct brw_context *brw)
>> >           wm.EarlyDepthStencilControl = EDSC_PSEXEC;
>> >  #endif
>> >     }
>> > +
>> > +#if GEN_GEN <= 5
>> > +   if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
>> > +      brw_batch_emit(brw, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) {
>> > +         clamp.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
>> > +      }
>> > +
>> > +      brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
>> > +   }
>> > +#endif
>> >  }
>> >
>> >  static const struct brw_tracked_state genX(wm_state) = {
>> > @@ -1880,17 +1989,23 @@ static const struct brw_tracked_state genX(wm_state) = {
>> >        .mesa  = _NEW_LINE |
>> >                 _NEW_POLYGON |
>> >                 (GEN_GEN < 8 ? _NEW_BUFFERS |
>> > -                              _NEW_COLOR |
>> > -                              _NEW_MULTISAMPLE :
>> > +                              _NEW_COLOR :
>> >                                0) |
>> > -               (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
>> > +               (GEN_GEN == 6 ? _NEW_PROGRAM_CONSTANTS : 0) |
>> > +               (GEN_GEN < 6 ? _NEW_POLYGONSTIPPLE : 0) |
>> > +               (GEN_GEN < 8 && GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0),
>> >        .brw   = BRW_NEW_BLORP |
>> >                 BRW_NEW_FS_PROG_DATA |
>> > +               (GEN_GEN < 6 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
>> > +                              BRW_NEW_FRAGMENT_PROGRAM |
>> > +                              BRW_NEW_PROGRAM_CACHE |
>> > +                              BRW_NEW_SAMPLER_STATE_TABLE |
>> > +                              BRW_NEW_STATS_WM
>> > +                            : 0) |
>> >                 (GEN_GEN < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT),
>> >     },
>> >     .emit = genX(upload_wm),
>> >  };
>> > -#endif
>> >
>> >  /* ---------------------------------------------------------------------- */
>> >
>> > @@ -4475,7 +4590,7 @@ genX(init_atoms)(struct brw_context *brw)
>> >        &brw_vs_samplers,
>> >
>> >        /* These set up state for brw_psp_urb_cbs */
>> > -      &brw_wm_unit,
>> > +      &genX(wm_state),
>> >        &genX(sf_clip_viewport),
>> >        &genX(sf_state),
>> >        &genX(vs_state), /* always required, enabled or not */
>> > --
>> > 2.9.4
>> >
>> > _______________________________________________
>> > mesa-dev mailing list
>> > mesa-dev at lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list