[Mesa-dev] [PATCH 18/18] i965: Convert WM_STATE to genxml on gen4-5.
Kristian Høgsberg
hoegsberg at gmail.com
Mon Jun 19 16:46:30 UTC 2017
On Fri, Jun 16, 2017 at 4:31 PM, Rafael Antognolli
<rafael.antognolli at intel.com> wrote:
> The code doesn't get exactly a lot simpler but at least it is in a single
> place, and we delete more than we add.
Another good point is that you get rid of struct brw_wm_unit_state
which was a third mechanism for encoding GEN state. We used to have
GENXML, manual packing and these bitfield structs. Now we're down to
just GENXML and some manual packing.
Kristian
>
> Signed-off-by: Rafael Antognolli <rafael.antognolli at intel.com>
> ---
> src/mesa/drivers/dri/i965/Makefile.sources | 1 -
> src/mesa/drivers/dri/i965/brw_state.h | 1 -
> src/mesa/drivers/dri/i965/brw_structs.h | 121 ------------
> src/mesa/drivers/dri/i965/brw_wm.h | 2 -
> src/mesa/drivers/dri/i965/brw_wm_state.c | 274 --------------------------
> src/mesa/drivers/dri/i965/genX_state_upload.c | 191 ++++++++++++++----
> 6 files changed, 153 insertions(+), 437 deletions(-)
> delete mode 100644 src/mesa/drivers/dri/i965/brw_wm_state.c
>
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
> index 89be92e..c15b3ef 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -61,7 +61,6 @@ i965_FILES = \
> brw_vs_surface_state.c \
> brw_wm.c \
> brw_wm.h \
> - brw_wm_state.c \
> brw_wm_surface_state.c \
> gen4_blorp_exec.h \
> gen6_clip_state.c \
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> index 8f3bd7f..9588a51 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -89,7 +89,6 @@ extern const struct brw_tracked_state brw_wm_image_surfaces;
> extern const struct brw_tracked_state brw_cs_ubo_surfaces;
> extern const struct brw_tracked_state brw_cs_abo_surfaces;
> extern const struct brw_tracked_state brw_cs_image_surfaces;
> -extern const struct brw_tracked_state brw_wm_unit;
>
> extern const struct brw_tracked_state brw_psp_urb_cbs;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
> index 5a0d91d..fb592be 100644
> --- a/src/mesa/drivers/dri/i965/brw_structs.h
> +++ b/src/mesa/drivers/dri/i965/brw_structs.h
> @@ -65,127 +65,6 @@ struct brw_urb_fence
> } bits1;
> };
>
> -/* State structs for the various fixed function units:
> - */
> -
> -
> -struct thread0
> -{
> - unsigned pad0:1;
> - unsigned grf_reg_count:3;
> - unsigned pad1:2;
> - unsigned kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
> -};
> -
> -struct thread1
> -{
> - unsigned ext_halt_exception_enable:1;
> - unsigned sw_exception_enable:1;
> - unsigned mask_stack_exception_enable:1;
> - unsigned timeout_exception_enable:1;
> - unsigned illegal_op_exception_enable:1;
> - unsigned pad0:3;
> - unsigned depth_coef_urb_read_offset:6; /* WM only */
> - unsigned pad1:2;
> - unsigned floating_point_mode:1;
> - unsigned thread_priority:1;
> - unsigned binding_table_entry_count:8;
> - unsigned pad3:5;
> - unsigned single_program_flow:1;
> -};
> -
> -struct thread2
> -{
> - unsigned per_thread_scratch_space:4;
> - unsigned pad0:6;
> - unsigned scratch_space_base_pointer:22;
> -};
> -
> -
> -struct thread3
> -{
> - unsigned dispatch_grf_start_reg:4;
> - unsigned urb_entry_read_offset:6;
> - unsigned pad0:1;
> - unsigned urb_entry_read_length:6;
> - unsigned pad1:1;
> - unsigned const_urb_entry_read_offset:6;
> - unsigned pad2:1;
> - unsigned const_urb_entry_read_length:6;
> - unsigned pad3:1;
> -};
> -
> -struct brw_wm_unit_state
> -{
> - struct thread0 thread0;
> - struct thread1 thread1;
> - struct thread2 thread2;
> - struct thread3 thread3;
> -
> - struct {
> - unsigned stats_enable:1;
> - unsigned depth_buffer_clear:1;
> - unsigned sampler_count:3;
> - unsigned sampler_state_pointer:27;
> - } wm4;
> -
> - struct
> - {
> - unsigned enable_8_pix:1;
> - unsigned enable_16_pix:1;
> - unsigned enable_32_pix:1;
> - unsigned enable_con_32_pix:1;
> - unsigned enable_con_64_pix:1;
> - unsigned pad0:1;
> -
> - /* These next four bits are for Ironlake+ */
> - unsigned fast_span_coverage_enable:1;
> - unsigned depth_buffer_clear:1;
> - unsigned depth_buffer_resolve_enable:1;
> - unsigned hierarchical_depth_buffer_resolve_enable:1;
> -
> - unsigned legacy_global_depth_bias:1;
> - unsigned line_stipple:1;
> - unsigned depth_offset:1;
> - unsigned polygon_stipple:1;
> - unsigned line_aa_region_width:2;
> - unsigned line_endcap_aa_region_width:2;
> - unsigned early_depth_test:1;
> - unsigned thread_dispatch_enable:1;
> - unsigned program_uses_depth:1;
> - unsigned program_computes_depth:1;
> - unsigned program_uses_killpixel:1;
> - unsigned legacy_line_rast: 1;
> - unsigned transposed_urb_read_enable:1;
> - unsigned max_threads:7;
> - } wm5;
> -
> - float global_depth_offset_constant;
> - float global_depth_offset_scale;
> -
> - /* for Ironlake only */
> - struct {
> - unsigned pad0:1;
> - unsigned grf_reg_count_1:3;
> - unsigned pad1:2;
> - unsigned kernel_start_pointer_1:26;
> - } wm8;
> -
> - struct {
> - unsigned pad0:1;
> - unsigned grf_reg_count_2:3;
> - unsigned pad1:2;
> - unsigned kernel_start_pointer_2:26;
> - } wm9;
> -
> - struct {
> - unsigned pad0:1;
> - unsigned grf_reg_count_3:3;
> - unsigned pad1:2;
> - unsigned kernel_start_pointer_3:26;
> - } wm10;
> -};
> -
> struct gen5_sampler_default_color {
> uint8_t ub[4];
> float f[4];
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
> index 613172a..113cdf3 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.h
> +++ b/src/mesa/drivers/dri/i965/brw_wm.h
> @@ -41,8 +41,6 @@
> extern "C" {
> #endif
>
> -bool brw_color_buffer_write_enabled(struct brw_context *brw);
> -
> void
> brw_upload_wm_prog(struct brw_context *brw);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
> deleted file mode 100644
> index 69bbeb2..0000000
> --- a/src/mesa/drivers/dri/i965/brw_wm_state.c
> +++ /dev/null
> @@ -1,274 +0,0 @@
> -/*
> - Copyright (C) Intel Corp. 2006. All Rights Reserved.
> - Intel funded Tungsten Graphics to
> - develop this 3D driver.
> -
> - Permission is hereby granted, free of charge, to any person obtaining
> - a copy of this software and associated documentation files (the
> - "Software"), to deal in the Software without restriction, including
> - without limitation the rights to use, copy, modify, merge, publish,
> - distribute, sublicense, and/or sell copies of the Software, and to
> - permit persons to whom the Software is furnished to do so, subject to
> - the following conditions:
> -
> - The above copyright notice and this permission notice (including the
> - next paragraph) shall be included in all copies or substantial
> - portions of the Software.
> -
> - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
> - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
> - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
> - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
> - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> -
> - **********************************************************************/
> - /*
> - * Authors:
> - * Keith Whitwell <keithw at vmware.com>
> - */
> -
> -
> -
> -#include "intel_batchbuffer.h"
> -#include "intel_fbo.h"
> -#include "brw_context.h"
> -#include "brw_state.h"
> -#include "brw_defines.h"
> -#include "brw_wm.h"
> -#include "compiler/nir/nir.h"
> -
> -/***********************************************************************
> - * WM unit - fragment programs and rasterization
> - */
> -
> -bool
> -brw_color_buffer_write_enabled(struct brw_context *brw)
> -{
> - struct gl_context *ctx = &brw->ctx;
> - /* BRW_NEW_FRAGMENT_PROGRAM */
> - const struct gl_program *fp = brw->fragment_program;
> - unsigned i;
> -
> - /* _NEW_BUFFERS */
> - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
> - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
> - uint64_t outputs_written = fp->info.outputs_written;
> -
> - /* _NEW_COLOR */
> - if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
> - outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
> - (ctx->Color.ColorMask[i][0] ||
> - ctx->Color.ColorMask[i][1] ||
> - ctx->Color.ColorMask[i][2] ||
> - ctx->Color.ColorMask[i][3])) {
> - return true;
> - }
> - }
> -
> - return false;
> -}
> -
> -/**
> - * Setup wm hardware state. See page 225 of Volume 2
> - */
> -static void
> -brw_upload_wm_unit(struct brw_context *brw)
> -{
> - const struct gen_device_info *devinfo = &brw->screen->devinfo;
> - struct gl_context *ctx = &brw->ctx;
> - /* BRW_NEW_FRAGMENT_PROGRAM */
> - const struct gl_program *fp = brw->fragment_program;
> - /* BRW_NEW_FS_PROG_DATA */
> - const struct brw_wm_prog_data *prog_data =
> - brw_wm_prog_data(brw->wm.base.prog_data);
> - struct brw_wm_unit_state *wm;
> -
> - wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.base.state_offset);
> - memset(wm, 0, sizeof(*wm));
> -
> - if (prog_data->dispatch_8 && prog_data->dispatch_16) {
> - /* These two fields should be the same pre-gen6, which is why we
> - * only have one hardware field to program for both dispatch
> - * widths.
> - */
> - assert(prog_data->base.dispatch_grf_start_reg ==
> - prog_data->dispatch_grf_start_reg_2);
> - }
> -
> - /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_FS_PROG_DATA */
> - wm->wm5.enable_8_pix = prog_data->dispatch_8;
> - wm->wm5.enable_16_pix = prog_data->dispatch_16;
> -
> - if (prog_data->dispatch_8 || prog_data->dispatch_16) {
> - wm->thread0.grf_reg_count = prog_data->reg_blocks_0;
> - wm->thread0.kernel_start_pointer =
> - brw_program_reloc(brw,
> - brw->wm.base.state_offset +
> - offsetof(struct brw_wm_unit_state, thread0),
> - brw->wm.base.prog_offset +
> - (wm->thread0.grf_reg_count << 1)) >> 6;
> - }
> -
> - if (prog_data->prog_offset_2) {
> - wm->wm9.grf_reg_count_2 = prog_data->reg_blocks_2;
> - wm->wm9.kernel_start_pointer_2 =
> - brw_program_reloc(brw,
> - brw->wm.base.state_offset +
> - offsetof(struct brw_wm_unit_state, wm9),
> - brw->wm.base.prog_offset +
> - prog_data->prog_offset_2 +
> - (wm->wm9.grf_reg_count_2 << 1)) >> 6;
> - }
> -
> - wm->thread1.depth_coef_urb_read_offset = 1;
> - if (prog_data->base.use_alt_mode)
> - wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
> - else
> - wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
> -
> - wm->thread1.binding_table_entry_count =
> - prog_data->base.binding_table.size_bytes / 4;
> -
> - if (prog_data->base.total_scratch != 0) {
> - wm->thread2.scratch_space_base_pointer =
> - brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */
> - wm->thread2.per_thread_scratch_space =
> - ffs(brw->wm.base.per_thread_scratch) - 11;
> - } else {
> - wm->thread2.scratch_space_base_pointer = 0;
> - wm->thread2.per_thread_scratch_space = 0;
> - }
> -
> - wm->thread3.dispatch_grf_start_reg =
> - prog_data->base.dispatch_grf_start_reg;
> - wm->thread3.urb_entry_read_length =
> - prog_data->num_varying_inputs * 2;
> - wm->thread3.urb_entry_read_offset = 0;
> - wm->thread3.const_urb_entry_read_length =
> - prog_data->base.curb_read_length;
> - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
> - wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
> -
> - if (brw->gen == 5)
> - wm->wm4.sampler_count = 0; /* hardware requirement */
> - else {
> - wm->wm4.sampler_count = (brw->wm.base.sampler_count + 1) / 4;
> - }
> -
> - if (brw->wm.base.sampler_count) {
> - /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */
> - wm->wm4.sampler_state_pointer = (brw->batch.bo->offset64 +
> - brw->wm.base.sampler_offset) >> 5;
> - } else {
> - wm->wm4.sampler_state_pointer = 0;
> - }
> -
> - /* BRW_NEW_FRAGMENT_PROGRAM */
> - wm->wm5.program_uses_depth = prog_data->uses_src_depth;
> - wm->wm5.program_computes_depth = (fp->info.outputs_written &
> - BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
> - /* _NEW_BUFFERS
> - * Override for NULL depthbuffer case, required by the Pixel Shader Computed
> - * Depth field.
> - */
> - if (!intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH))
> - wm->wm5.program_computes_depth = 0;
> -
> - /* _NEW_COLOR */
> - wm->wm5.program_uses_killpixel =
> - prog_data->uses_kill || ctx->Color.AlphaEnabled;
> -
> - wm->wm5.max_threads = devinfo->max_wm_threads - 1;
> -
> - /* _NEW_BUFFERS | _NEW_COLOR */
> - if (brw_color_buffer_write_enabled(brw) ||
> - wm->wm5.program_uses_killpixel ||
> - wm->wm5.program_computes_depth) {
> - wm->wm5.thread_dispatch_enable = 1;
> - }
> -
> - wm->wm5.legacy_line_rast = 0;
> - wm->wm5.legacy_global_depth_bias = 0;
> - wm->wm5.early_depth_test = 1; /* never need to disable */
> - wm->wm5.line_aa_region_width = 0;
> - wm->wm5.line_endcap_aa_region_width = 1;
> -
> - /* _NEW_POLYGONSTIPPLE */
> - wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag;
> -
> - /* _NEW_POLYGON */
> - if (ctx->Polygon.OffsetFill) {
> - wm->wm5.depth_offset = 1;
> - /* Something weird going on with legacy_global_depth_bias,
> - * offset_constant, scaling and MRD. This value passes glean
> - * but gives some odd results elsewere (eg. the
> - * quad-offset-units test).
> - */
> - wm->global_depth_offset_constant = ctx->Polygon.OffsetUnits * 2;
> -
> - /* This is the only value that passes glean:
> - */
> - wm->global_depth_offset_scale = ctx->Polygon.OffsetFactor;
> - }
> -
> - /* _NEW_LINE */
> - wm->wm5.line_stipple = ctx->Line.StippleFlag;
> -
> - /* BRW_NEW_STATS_WM */
> - if (brw->stats_wm)
> - wm->wm4.stats_enable = 1;
> -
> - /* Emit scratch space relocation */
> - if (prog_data->base.total_scratch != 0) {
> - brw_emit_reloc(&brw->batch,
> - brw->wm.base.state_offset +
> - offsetof(struct brw_wm_unit_state, thread2),
> - brw->wm.base.scratch_bo,
> - wm->thread2.per_thread_scratch_space,
> - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
> - }
> -
> - /* Emit sampler state relocation */
> - if (brw->wm.base.sampler_count != 0) {
> - brw_emit_reloc(&brw->batch,
> - brw->wm.base.state_offset +
> - offsetof(struct brw_wm_unit_state, wm4),
> - brw->batch.bo,
> - brw->wm.base.sampler_offset | wm->wm4.stats_enable |
> - (wm->wm4.sampler_count << 2),
> - I915_GEM_DOMAIN_INSTRUCTION, 0);
> - }
> -
> - brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
> -
> - /* _NEW_POLGYON */
> - if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
> - BEGIN_BATCH(2);
> - OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
> - OUT_BATCH_F(ctx->Polygon.OffsetClamp);
> - ADVANCE_BATCH();
> -
> - brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
> - }
> -}
> -
> -const struct brw_tracked_state brw_wm_unit = {
> - .dirty = {
> - .mesa = _NEW_BUFFERS |
> - _NEW_COLOR |
> - _NEW_LINE |
> - _NEW_POLYGON |
> - _NEW_POLYGONSTIPPLE,
> - .brw = BRW_NEW_BATCH |
> - BRW_NEW_BLORP |
> - BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> - BRW_NEW_FRAGMENT_PROGRAM |
> - BRW_NEW_FS_PROG_DATA |
> - BRW_NEW_PROGRAM_CACHE |
> - BRW_NEW_SAMPLER_STATE_TABLE |
> - BRW_NEW_STATS_WM,
> - },
> - .emit = brw_upload_wm_unit,
> -};
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index 4ff5394..bc64c5d 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -1713,7 +1713,33 @@ static const struct brw_tracked_state genX(sf_state) = {
>
> /* ---------------------------------------------------------------------- */
>
> -#if GEN_GEN >= 6
> +static bool
> +brw_color_buffer_write_enabled(struct brw_context *brw)
> +{
> + struct gl_context *ctx = &brw->ctx;
> + /* BRW_NEW_FRAGMENT_PROGRAM */
> + const struct gl_program *fp = brw->fragment_program;
> + unsigned i;
> +
> + /* _NEW_BUFFERS */
> + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
> + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
> + uint64_t outputs_written = fp->info.outputs_written;
> +
> + /* _NEW_COLOR */
> + if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
> + outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
> + (ctx->Color.ColorMask[i][0] ||
> + ctx->Color.ColorMask[i][1] ||
> + ctx->Color.ColorMask[i][2] ||
> + ctx->Color.ColorMask[i][3])) {
> + return true;
> + }
> + }
> +
> + return false;
> +}
> +
> static void
> genX(upload_wm)(struct brw_context *brw)
> {
> @@ -1725,11 +1751,10 @@ genX(upload_wm)(struct brw_context *brw)
>
> UNUSED bool writes_depth =
> wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
> + UNUSED struct brw_stage_state *stage_state = &brw->wm.base;
> + UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
>
> -#if GEN_GEN < 7
> - const struct brw_stage_state *stage_state = &brw->wm.base;
> - const struct gen_device_info *devinfo = &brw->screen->devinfo;
> -
> +#if GEN_GEN == 6
> /* We can't fold this into gen6_upload_wm_push_constants(), because
> * according to the SNB PRM, vol 2 part 1 section 7.2.2
> * (3DSTATE_CONSTANT_PS [DevSNB]):
> @@ -1748,27 +1773,94 @@ genX(upload_wm)(struct brw_context *brw)
> }
> #endif
>
> +#if GEN_GEN >= 6
> brw_batch_emit(brw, GENX(3DSTATE_WM), wm) {
> - wm.StatisticsEnable = true;
> wm.LineAntialiasingRegionWidth = _10pixels;
> wm.LineEndCapAntialiasingRegionWidth = _05pixels;
>
> + wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
> + wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
> +#else
> + ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
> + brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, wm) {
> + if (wm_prog_data->dispatch_8 && wm_prog_data->dispatch_16) {
> + /* These two fields should be the same pre-gen6, which is why we
> + * only have one hardware field to program for both dispatch
> + * widths.
> + */
> + assert(wm_prog_data->base.dispatch_grf_start_reg ==
> + wm_prog_data->dispatch_grf_start_reg_2);
> + }
> +
> + if (wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16)
> + wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0;
> +
> + if (stage_state->sampler_count)
> + wm.SamplerStatePointer =
> + instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset);
> +#if GEN_GEN == 5
> + if (wm_prog_data->prog_offset_2)
> + wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2;
> +#endif
> +
> + wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;
> + wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length;
> + /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
> + wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2;
> + wm.EarlyDepthTestEnable = true;
> + wm.LineAntialiasingRegionWidth = _05pixels;
> + wm.LineEndCapAntialiasingRegionWidth = _10pixels;
> +
> + /* _NEW_POLYGON */
> + if (ctx->Polygon.OffsetFill) {
> + wm.GlobalDepthOffsetEnable = true;
> + /* Something weird going on with legacy_global_depth_bias,
> + * offset_constant, scaling and MRD. This value passes glean
> + * but gives some odd results elsewere (eg. the
> + * quad-offset-units test).
> + */
> + wm.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
> +
> + /* This is the only value that passes glean:
> + */
> + wm.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
> + }
> +
> + wm.DepthCoefficientURBReadOffset = 1;
> +#endif
> +
> + /* BRW_NEW_STATS_WM */
> + wm.StatisticsEnable = GEN_GEN >= 6 || brw->stats_wm;
> +
> #if GEN_GEN < 7
> if (wm_prog_data->base.use_alt_mode)
> - wm.FloatingPointMode = Alternate;
> + wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
> +
> + wm.SamplerCount = GEN_GEN == 5 ?
> + 0 : DIV_ROUND_UP(stage_state->sampler_count, 4);
>
> - wm.SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4);
> - wm.BindingTableEntryCount = wm_prog_data->base.binding_table.size_bytes / 4;
> + wm.BindingTableEntryCount =
> + wm_prog_data->base.binding_table.size_bytes / 4;
> wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
> wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
> wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
> wm.DispatchGRFStartRegisterForConstantSetupData0 =
> wm_prog_data->base.dispatch_grf_start_reg;
> - wm.DispatchGRFStartRegisterForConstantSetupData2 =
> - wm_prog_data->dispatch_grf_start_reg_2;
> - wm.KernelStartPointer0 = stage_state->prog_offset;
> - wm.KernelStartPointer2 = stage_state->prog_offset +
> - wm_prog_data->prog_offset_2;
> + if (GEN_GEN == 6 ||
> + wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) {
> + wm.KernelStartPointer0 = KSP_ro(brw,
> + stage_state->prog_offset);
> + }
> +
> +#if GEN_GEN >= 5
> + if (GEN_GEN == 6 || wm_prog_data->prog_offset_2) {
> + wm.KernelStartPointer2 =
> + KSP_ro(brw, stage_state->prog_offset +
> + wm_prog_data->prog_offset_2);
> + }
> +#endif
> +
> +#if GEN_GEN == 6
> wm.DualSourceBlendEnable =
> wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) &&
> ctx->Color.Blend[0]._UsesDualSrc;
> @@ -1792,42 +1884,34 @@ genX(upload_wm)(struct brw_context *brw)
> else
> wm.PositionXYOffsetSelect = POSOFFSET_NONE;
>
> + wm.DispatchGRFStartRegisterForConstantSetupData2 =
> + wm_prog_data->dispatch_grf_start_reg_2;
> +#endif
> +
> if (wm_prog_data->base.total_scratch) {
> wm.ScratchSpaceBasePointer =
> - render_bo(stage_state->scratch_bo,
> - ffs(stage_state->per_thread_scratch) - 11);
> + render_bo(stage_state->scratch_bo, 0);
> + wm.PerThreadScratchSpace =
> + ffs(stage_state->per_thread_scratch) - 11;
> }
>
> wm.PixelShaderComputedDepth = writes_depth;
> #endif
>
> - wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
> -
> /* _NEW_LINE */
> wm.LineStippleEnable = ctx->Line.StippleFlag;
>
> /* _NEW_POLYGON */
> wm.PolygonStippleEnable = ctx->Polygon.StippleFlag;
> - wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
>
> #if GEN_GEN < 8
> - /* _NEW_BUFFERS */
> - const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
>
> - wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
> +#if GEN_GEN >= 6
> wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
> - if (wm_prog_data->uses_kill ||
> - _mesa_is_alpha_test_enabled(ctx) ||
> - _mesa_is_alpha_to_coverage_enabled(ctx) ||
> - wm_prog_data->uses_omask) {
> - wm.PixelShaderKillsPixel = true;
> - }
>
> - /* _NEW_BUFFERS | _NEW_COLOR */
> - if (brw_color_buffer_write_enabled(brw) || writes_depth ||
> - wm_prog_data->has_side_effects || wm.PixelShaderKillsPixel) {
> - wm.ThreadDispatchEnable = true;
> - }
> + /* _NEW_BUFFERS */
> + const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
> +
> if (multisampled_fbo) {
> /* _NEW_MULTISAMPLE */
> if (ctx->Multisample.Enabled)
> @@ -1843,6 +1927,21 @@ genX(upload_wm)(struct brw_context *brw)
> wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
> wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
> }
> +#endif
> + wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
> + if (wm_prog_data->uses_kill ||
> + _mesa_is_alpha_test_enabled(ctx) ||
> + _mesa_is_alpha_to_coverage_enabled(ctx) ||
> + (GEN_GEN >= 6 && wm_prog_data->uses_omask)) {
> + wm.PixelShaderKillsPixel = true;
> + }
> +
> + /* _NEW_BUFFERS | _NEW_COLOR */
> + if (brw_color_buffer_write_enabled(brw) || writes_depth ||
> + wm.PixelShaderKillsPixel ||
> + (GEN_GEN >= 6 && wm_prog_data->has_side_effects)) {
> + wm.ThreadDispatchEnable = true;
> + }
>
> #if GEN_GEN >= 7
> wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
> @@ -1873,6 +1972,16 @@ genX(upload_wm)(struct brw_context *brw)
> wm.EarlyDepthStencilControl = EDSC_PSEXEC;
> #endif
> }
> +
> +#if GEN_GEN <= 5
> + if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
> + brw_batch_emit(brw, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) {
> + clamp.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
> + }
> +
> + brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
> + }
> +#endif
> }
>
> static const struct brw_tracked_state genX(wm_state) = {
> @@ -1880,17 +1989,23 @@ static const struct brw_tracked_state genX(wm_state) = {
> .mesa = _NEW_LINE |
> _NEW_POLYGON |
> (GEN_GEN < 8 ? _NEW_BUFFERS |
> - _NEW_COLOR |
> - _NEW_MULTISAMPLE :
> + _NEW_COLOR :
> 0) |
> - (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
> + (GEN_GEN == 6 ? _NEW_PROGRAM_CONSTANTS : 0) |
> + (GEN_GEN < 6 ? _NEW_POLYGONSTIPPLE : 0) |
> + (GEN_GEN < 8 && GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0),
> .brw = BRW_NEW_BLORP |
> BRW_NEW_FS_PROG_DATA |
> + (GEN_GEN < 6 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> + BRW_NEW_FRAGMENT_PROGRAM |
> + BRW_NEW_PROGRAM_CACHE |
> + BRW_NEW_SAMPLER_STATE_TABLE |
> + BRW_NEW_STATS_WM
> + : 0) |
> (GEN_GEN < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT),
> },
> .emit = genX(upload_wm),
> };
> -#endif
>
> /* ---------------------------------------------------------------------- */
>
> @@ -4475,7 +4590,7 @@ genX(init_atoms)(struct brw_context *brw)
> &brw_vs_samplers,
>
> /* These set up state for brw_psp_urb_cbs */
> - &brw_wm_unit,
> + &genX(wm_state),
> &genX(sf_clip_viewport),
> &genX(sf_state),
> &genX(vs_state), /* always required, enabled or not */
> --
> 2.9.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list