[Mesa-dev] [PATCH 01/12] i965: Assign PS kernel start pointers when we decide which kernels to use

Ian Romanick idr at freedesktop.org
Wed Aug 13 18:16:23 PDT 2014


On 08/11/2014 05:29 PM, Kristian Høgsberg wrote:
> Right now we decide which kernels to use and the GRF start offsets in
> one place and emit the kernel pointers later.  The logic of how to map
> 8, 16 and 32 kernels to kernel start pointers follows the same logic as which
> GRF start offsets to use, so lets figure out these two things in one place.
> 
> Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
> ---
>  src/mesa/drivers/dri/i965/gen6_wm_state.c | 16 ++++++++--------
>  src/mesa/drivers/dri/i965/gen7_wm_state.c | 18 ++++++++++--------
>  src/mesa/drivers/dri/i965/gen8_ps_state.c | 14 ++++++++------
>  3 files changed, 26 insertions(+), 22 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
> index 047e036..ebd8443 100644
> --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
> @@ -71,7 +71,7 @@ upload_wm_state(struct brw_context *brw)
>     struct gl_context *ctx = &brw->ctx;
>     const struct brw_fragment_program *fp =
>        brw_fragment_program_const(brw->fragment_program);
> -   uint32_t dw2, dw4, dw5, dw6;
> +   uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2;
>  
>     /* _NEW_BUFFERS */
>     bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
> @@ -109,7 +109,7 @@ upload_wm_state(struct brw_context *brw)
>        ADVANCE_BATCH();
>     }
>  
> -   dw2 = dw4 = dw5 = dw6 = 0;
> +   dw2 = dw4 = dw5 = dw6 = ksp2 = 0;
>     dw4 |= GEN6_WM_STATISTICS_ENABLE;
>     dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
>     dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
> @@ -151,14 +151,18 @@ upload_wm_state(struct brw_context *brw)
>                   GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
>           dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
>                   GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
> +         ksp0 = brw->wm.base.prog_offset;
> +         ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
>        } else
>           dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
>                  GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
> +         ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;

This looks pretty broken.  More { and } are needed.

>     }
>     else {
>        dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
>        dw4 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
>                GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
> +      ksp0 = brw->wm.base.prog_offset;
>     }
>  
>     /* CACHE_NEW_WM_PROG | _NEW_COLOR */
> @@ -277,10 +281,7 @@ upload_wm_state(struct brw_context *brw)
>  
>     BEGIN_BATCH(9);
>     OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
> -   if (brw->wm.prog_data->prog_offset_16 && min_inv_per_frag > 1)
> -      OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
> -   else
> -      OUT_BATCH(brw->wm.base.prog_offset);
> +   OUT_BATCH(ksp0);
>     OUT_BATCH(dw2);
>     if (brw->wm.prog_data->total_scratch) {
>        OUT_RELOC(brw->wm.base.scratch_bo,
> @@ -293,8 +294,7 @@ upload_wm_state(struct brw_context *brw)
>     OUT_BATCH(dw5);
>     OUT_BATCH(dw6);
>     OUT_BATCH(0); /* kernel 1 pointer */
> -   /* kernel 2 pointer */
> -   OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
> +   OUT_BATCH(ksp2);
>     ADVANCE_BATCH();
>  }
>  
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
> index 27a3f44..c03d19d 100644
> --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
> @@ -139,11 +139,11 @@ static void
>  upload_ps_state(struct brw_context *brw)
>  {
>     struct gl_context *ctx = &brw->ctx;
> -   uint32_t dw2, dw4, dw5;
> +   uint32_t dw2, dw4, dw5, ksp0, ksp2;
>     const int max_threads_shift = brw->is_haswell ?
>        HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
>  
> -   dw2 = dw4 = dw5 = 0;
> +   dw2 = dw4 = dw5 = ksp2 = 0;
>  
>     dw2 |=
>        (ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
> @@ -231,22 +231,24 @@ upload_ps_state(struct brw_context *brw)
>                   GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
>           dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
>                   GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
> -      } else
> +         ksp0 = brw->wm.base.prog_offset;
> +         ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
> +      } else {
>           dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
>                   GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
> +         ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
> +      }
>     }
>     else {
>        dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
>        dw5 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
>                GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
> +      ksp0 = brw->wm.base.prog_offset;
>     }
>  
>     BEGIN_BATCH(8);
>     OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
> -   if (brw->wm.prog_data->prog_offset_16 && min_inv_per_frag > 1)
> -      OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
> -   else
> -      OUT_BATCH(brw->wm.base.prog_offset);
> +   OUT_BATCH(ksp0);
>     OUT_BATCH(dw2);
>     if (brw->wm.prog_data->total_scratch) {
>        OUT_RELOC(brw->wm.base.scratch_bo,
> @@ -258,7 +260,7 @@ upload_ps_state(struct brw_context *brw)
>     OUT_BATCH(dw4);
>     OUT_BATCH(dw5);
>     OUT_BATCH(0); /* kernel 1 pointer */
> -   OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
> +   OUT_BATCH(ksp2);
>     ADVANCE_BATCH();
>  }
>  
> diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> index 3d6d7f0..f58d49c 100644
> --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> @@ -134,7 +134,7 @@ static void
>  upload_ps_state(struct brw_context *brw)
>  {
>     struct gl_context *ctx = &brw->ctx;
> -   uint32_t dw3 = 0, dw6 = 0, dw7 = 0;
> +   uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0;
>  
>     /* Initialize the execution mask with VMask.  Otherwise, derivatives are
>      * incorrect for subspans where some of the pixels are unlit.  We believe
> @@ -203,22 +203,24 @@ upload_ps_state(struct brw_context *brw)
>                   GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
>           dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
>                   GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
> +         ksp0 = brw->wm.base.prog_offset;
> +         ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
>        } else {
>           dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
>                   GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
> +
> +         ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
>        }
>     } else {
>        dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
>        dw7 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
>                GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
> +      ksp0 = brw->wm.base.prog_offset;
>     }
>  
>     BEGIN_BATCH(12);
>     OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2));
> -   if (brw->wm.prog_data->prog_offset_16 && min_invocations_per_fragment > 1)
> -      OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
> -   else
> -      OUT_BATCH(brw->wm.base.prog_offset);
> +   OUT_BATCH(ksp0);
>     OUT_BATCH(0);
>     OUT_BATCH(dw3);
>     if (brw->wm.prog_data->total_scratch) {
> @@ -233,7 +235,7 @@ upload_ps_state(struct brw_context *brw)
>     OUT_BATCH(dw7);
>     OUT_BATCH(0); /* kernel 1 pointer */
>     OUT_BATCH(0);
> -   OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
> +   OUT_BATCH(ksp2);
>     OUT_BATCH(0);
>     ADVANCE_BATCH();
>  }
> 



More information about the mesa-dev mailing list