[Mesa-dev] [PATCH 01/10] i965: Split sampler count variable to be per-stage.

Ian Romanick idr at freedesktop.org
Fri Aug 16 21:57:32 PDT 2013


On 08/14/2013 06:55 PM, Kenneth Graunke wrote:
> Currently, we only have a single sampler state table shared among all
> stages, so we just copy wm.sampler_count into vs.sampler_count.
>
> In the future, each shader stage will have its own SAMPLER_STATE table,
> at which point we'll need these separate sampler counts.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>   src/mesa/drivers/dri/i965/brw_context.h          |  5 ++++-
>   src/mesa/drivers/dri/i965/brw_vs_state.c         |  4 ++--
>   src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 12 +++++++-----
>   src/mesa/drivers/dri/i965/brw_wm_state.c         |  6 +++---
>   src/mesa/drivers/dri/i965/gen6_vs_state.c        |  2 +-
>   src/mesa/drivers/dri/i965/gen6_wm_state.c        |  2 +-
>   src/mesa/drivers/dri/i965/gen7_sampler_state.c   | 12 +++++++-----
>   src/mesa/drivers/dri/i965/gen7_vs_state.c        |  2 +-
>   src/mesa/drivers/dri/i965/gen7_wm_state.c        |  2 +-
>   9 files changed, 27 insertions(+), 20 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 74e38f1..63136b1 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1065,7 +1065,6 @@ struct brw_context
>
>      /** SAMPLER_STATE count and offset */
>      struct {
> -      GLuint count;
>         uint32_t offset;
>      } sampler;
>
> @@ -1109,6 +1108,8 @@ struct brw_context
>
>         uint32_t bind_bo_offset;
>         uint32_t surf_offset[BRW_MAX_VS_SURFACES];
> +
> +      uint32_t sampler_count;
>      } vs;

There's a lot of commonality between these structures.  If 
BRW_MAX_VS_SURFACES, BRW_MAX_GS_SURFACES, and BRW_MAX_WM_SURFACES are 
the same, we should make this common stuff common.  Yeah?

>
>      struct {
> @@ -1182,6 +1183,8 @@ struct brw_context
>         uint32_t bind_bo_offset;
>         uint32_t surf_offset[BRW_MAX_WM_SURFACES];
>
> +      uint32_t sampler_count;
> +
>         struct {
>            struct ra_regs *regs;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
> index ddaf914..13aabac 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
> @@ -142,7 +142,7 @@ brw_upload_vs_unit(struct brw_context *brw)
>         vs->vs5.sampler_count = 0; /* hardware requirement */
>      else {
>         /* CACHE_NEW_SAMPLER */
> -      vs->vs5.sampler_count = (brw->sampler.count + 3) / 4;
> +      vs->vs5.sampler_count = (brw->vs.sampler_count + 3) / 4;
>      }
>
>
> @@ -155,7 +155,7 @@ brw_upload_vs_unit(struct brw_context *brw)
>
>      /* Set the sampler state pointer, and its reloc
>       */
> -   if (brw->sampler.count) {
> +   if (brw->vs.sampler_count) {
>         vs->vs5.sampler_state_pointer =
>            (brw->batch.bo->offset + brw->sampler.offset) >> 5;
>         drm_intel_bo_emit_reloc(brw->batch.bo,
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
> index 5457671..40a6d5b 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
> @@ -377,17 +377,19 @@ brw_upload_samplers(struct brw_context *brw)
>      /* ARB programs use the texture unit number as the sampler index, so we
>       * need to find the highest unit used.  A bit-count will not work.
>       */
> -   brw->sampler.count = _mesa_fls(SamplersUsed);
> +   brw->wm.sampler_count = _mesa_fls(SamplersUsed);
> +   /* Currently we only use one sampler state table.  Mirror the count. */
> +   brw->vs.sampler_count = brw->wm.sampler_count;
>
> -   if (brw->sampler.count == 0)
> +   if (brw->wm.sampler_count == 0)
>         return;
>
>      samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
> -			      brw->sampler.count * sizeof(*samplers),
> +			      brw->wm.sampler_count * sizeof(*samplers),
>   			      32, &brw->sampler.offset);
> -   memset(samplers, 0, brw->sampler.count * sizeof(*samplers));
> +   memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers));
>
> -   for (unsigned s = 0; s < brw->sampler.count; s++) {
> +   for (unsigned s = 0; s < brw->wm.sampler_count; s++) {
>         if (SamplersUsed & (1 << s)) {
>            const unsigned unit = (fs->SamplersUsed & (1 << s)) ?
>               fs->SamplerUnits[s] : vs->SamplerUnits[s];
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
> index 631f351..106d628 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
> @@ -144,10 +144,10 @@ brw_upload_wm_unit(struct brw_context *brw)
>         wm->wm4.sampler_count = 0; /* hardware requirement */
>      else {
>         /* CACHE_NEW_SAMPLER */
> -      wm->wm4.sampler_count = (brw->sampler.count + 1) / 4;
> +      wm->wm4.sampler_count = (brw->wm.sampler_count + 1) / 4;
>      }
>
> -   if (brw->sampler.count) {
> +   if (brw->wm.sampler_count) {
>         /* reloc */
>         wm->wm4.sampler_state_pointer = (brw->batch.bo->offset +
>   				       brw->sampler.offset) >> 5;
> @@ -225,7 +225,7 @@ brw_upload_wm_unit(struct brw_context *brw)
>      }
>
>      /* Emit sampler state relocation */
> -   if (brw->sampler.count != 0) {
> +   if (brw->wm.sampler_count != 0) {
>         drm_intel_bo_emit_reloc(brw->batch.bo,
>   			      brw->wm.state_offset +
>   			      offsetof(struct brw_wm_unit_state, wm4),
> diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> index da20713..4af7cda 100644
> --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> @@ -149,7 +149,7 @@ upload_vs_state(struct brw_context *brw)
>      OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
>      OUT_BATCH(brw->vs.prog_offset);
>      OUT_BATCH(floating_point_mode |
> -	     ((ALIGN(brw->sampler.count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
> +	     ((ALIGN(brw->vs.sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
>
>      if (brw->vs.prog_data->base.total_scratch) {
>         OUT_RELOC(brw->vs.scratch_bo,
> diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
> index 24c96cc..e286785 100644
> --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
> @@ -140,7 +140,7 @@ upload_wm_state(struct brw_context *brw)
>         dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
>
>      /* CACHE_NEW_SAMPLER */
> -   dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
> +   dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
>      dw4 |= (brw->wm.prog_data->first_curbe_grf <<
>   	   GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
>      dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
> diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
> index 7ce58ce..f09c6b3 100644
> --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
> @@ -195,17 +195,19 @@ gen7_upload_samplers(struct brw_context *brw)
>
>      GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed;
>
> -   brw->sampler.count = _mesa_fls(SamplersUsed);
> +   brw->wm.sampler_count = _mesa_fls(SamplersUsed);
> +   /* Currently we only use one sampler state table.  Mirror the count. */
> +   brw->vs.sampler_count = brw->wm.sampler_count;
>
> -   if (brw->sampler.count == 0)
> +   if (brw->wm.sampler_count == 0)
>         return;
>
>      samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
> -			      brw->sampler.count * sizeof(*samplers),
> +			      brw->wm.sampler_count * sizeof(*samplers),
>   			      32, &brw->sampler.offset);
> -   memset(samplers, 0, brw->sampler.count * sizeof(*samplers));
> +   memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers));
>
> -   for (unsigned s = 0; s < brw->sampler.count; s++) {
> +   for (unsigned s = 0; s < brw->wm.sampler_count; s++) {
>         if (SamplersUsed & (1 << s)) {
>            const unsigned unit = (fs->SamplersUsed & (1 << s)) ?
>               fs->SamplerUnits[s] : vs->SamplerUnits[s];
> diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
> index 0340da4..634bd95 100644
> --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
> @@ -89,7 +89,7 @@ upload_vs_state(struct brw_context *brw)
>      OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
>      OUT_BATCH(brw->vs.prog_offset);
>      OUT_BATCH(floating_point_mode |
> -	     ((ALIGN(brw->sampler.count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
> +	     ((ALIGN(brw->vs.sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
>
>      if (brw->vs.prog_data->base.total_scratch) {
>         OUT_RELOC(brw->vs.scratch_bo,
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
> index 1bc6e2e..d079a52 100644
> --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
> @@ -162,7 +162,7 @@ upload_ps_state(struct brw_context *brw)
>      dw2 = dw4 = dw5 = 0;
>
>      /* CACHE_NEW_SAMPLER */
> -   dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
> +   dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
>
>      /* Use ALT floating point mode for ARB fragment programs, because they
>       * require 0^0 == 1.  Even though _CurrentFragmentProgram is used for
>



More information about the mesa-dev mailing list