Mesa (master): ilo: let shaders determine sampler counts

Chia-I Wu olv at kemper.freedesktop.org
Sat Oct 4 16:22:50 UTC 2014


Module: Mesa
Branch: master
Commit: ca824e69403a32144328d1fb7987d0537e88ee04
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ca824e69403a32144328d1fb7987d0537e88ee04

Author: Chia-I Wu <olvaffe at gmail.com>
Date:   Sat Oct  4 10:51:20 2014 +0800

ilo: let shaders determine sampler counts

When a shader needs N samplers, we should upload N samplers and not depend on
how many are bound.

Signed-off-by: Chia-I Wu <olvaffe at gmail.com>

---

 src/gallium/drivers/ilo/ilo_blitter_pipe.c      |    8 ++---
 src/gallium/drivers/ilo/ilo_builder.h           |   17 ++++++++++
 src/gallium/drivers/ilo/ilo_builder_3d_bottom.h |    7 +---
 src/gallium/drivers/ilo/ilo_builder_3d_top.h    |   27 +++++++++-------
 src/gallium/drivers/ilo/ilo_render_dynamic.c    |   39 ++++++++++++++---------
 src/gallium/drivers/ilo/ilo_render_gen6.c       |   19 ++++-------
 src/gallium/drivers/ilo/ilo_render_gen7.c       |   35 ++++++++------------
 src/gallium/drivers/ilo/ilo_shader.c            |    3 ++
 src/gallium/drivers/ilo/ilo_shader.h            |    1 +
 src/gallium/drivers/ilo/ilo_state.c             |   12 -------
 src/gallium/drivers/ilo/ilo_state.h             |    1 -
 src/gallium/drivers/ilo/ilo_state_gen6.c        |    8 +++--
 src/gallium/drivers/ilo/ilo_state_gen7.c        |    8 +++--
 13 files changed, 98 insertions(+), 87 deletions(-)

diff --git a/src/gallium/drivers/ilo/ilo_blitter_pipe.c b/src/gallium/drivers/ilo/ilo_blitter_pipe.c
index d9d50ea..c4c02bd 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_pipe.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_pipe.c
@@ -69,12 +69,12 @@ ilo_blitter_pipe_begin(struct ilo_blitter *blitter,
    case ILO_BLITTER_PIPE_BLIT:
    case ILO_BLITTER_PIPE_COPY:
       /*
-       * we are about to call util_blitter_blit() or
-       * util_blitter_copy_texture()
+       * We are about to call util_blitter_blit() or
+       * util_blitter_copy_texture().  Note that util_blitter uses at most two
+       * textures.
        */
       util_blitter_save_fragment_sampler_states(b,
-            vec->sampler[PIPE_SHADER_FRAGMENT].count,
-            (void **) vec->sampler[PIPE_SHADER_FRAGMENT].cso);
+            2, (void **) vec->sampler[PIPE_SHADER_FRAGMENT].cso);
 
       util_blitter_save_fragment_sampler_views(b,
             vec->view[PIPE_SHADER_FRAGMENT].count,
diff --git a/src/gallium/drivers/ilo/ilo_builder.h b/src/gallium/drivers/ilo/ilo_builder.h
index 38b1c83..102f11a 100644
--- a/src/gallium/drivers/ilo/ilo_builder.h
+++ b/src/gallium/drivers/ilo/ilo_builder.h
@@ -312,6 +312,23 @@ ilo_builder_dynamic_write(struct ilo_builder *builder,
    return offset;
 }
 
+/**
+ * Reserve some space from the top (for prefetches).
+ */
+static inline void
+ilo_builder_dynamic_pad_top(struct ilo_builder *builder, unsigned len)
+{
+   const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
+   const unsigned size = len << 2;
+   struct ilo_builder_writer *writer = &builder->writers[which];
+
+   if (writer->stolen < size) {
+      ilo_builder_writer_reserve_top(builder, which,
+            1, size - writer->stolen);
+      writer->stolen = size;
+   }
+}
+
 static inline unsigned
 ilo_builder_dynamic_used(const struct ilo_builder *builder)
 {
diff --git a/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h
index 73b5fae..456a494 100644
--- a/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h
+++ b/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h
@@ -273,7 +273,6 @@ gen7_3DSTATE_SBE(struct ilo_builder *builder,
 static inline void
 gen6_3DSTATE_WM(struct ilo_builder *builder,
                 const struct ilo_shader_state *fs,
-                int num_samplers,
                 const struct ilo_rasterizer_state *rasterizer,
                 bool dual_blend, bool cc_may_kill,
                 uint32_t hiz_op)
@@ -311,8 +310,6 @@ gen6_3DSTATE_WM(struct ilo_builder *builder,
    dw5 = fs_cso->payload[2];
    dw6 = fs_cso->payload[3];
 
-   dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
    /*
     * From the Sandy Bridge PRM, volume 2 part 1, page 248:
     *
@@ -399,7 +396,7 @@ gen7_3DSTATE_WM(struct ilo_builder *builder,
 static inline void
 gen7_3DSTATE_PS(struct ilo_builder *builder,
                 const struct ilo_shader_state *fs,
-                int num_samplers, bool dual_blend)
+                bool dual_blend)
 {
    const uint8_t cmd_len = 8;
    const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
@@ -446,8 +443,6 @@ gen7_3DSTATE_PS(struct ilo_builder *builder,
    dw4 = cso->payload[1];
    dw5 = cso->payload[2];
 
-   dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
    if (dual_blend)
       dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
 
diff --git a/src/gallium/drivers/ilo/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/ilo_builder_3d_top.h
index 4bcab8c..6d6fa0e 100644
--- a/src/gallium/drivers/ilo/ilo_builder_3d_top.h
+++ b/src/gallium/drivers/ilo/ilo_builder_3d_top.h
@@ -544,8 +544,7 @@ gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
 
 static inline void
 gen6_3DSTATE_VS(struct ilo_builder *builder,
-                const struct ilo_shader_state *vs,
-                int num_samplers)
+                const struct ilo_shader_state *vs)
 {
    const uint8_t cmd_len = 6;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
@@ -571,8 +570,6 @@ gen6_3DSTATE_VS(struct ilo_builder *builder,
    dw4 = cso->payload[1];
    dw5 = cso->payload[2];
 
-   dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
    dw[0] = dw0;
    dw[1] = ilo_shader_get_kernel_offset(vs);
@@ -584,8 +581,7 @@ gen6_3DSTATE_VS(struct ilo_builder *builder,
 
 static inline void
 gen7_3DSTATE_HS(struct ilo_builder *builder,
-                const struct ilo_shader_state *hs,
-                int num_samplers)
+                const struct ilo_shader_state *hs)
 {
    const uint8_t cmd_len = 7;
    uint32_t *dw;
@@ -623,8 +619,7 @@ gen7_3DSTATE_TE(struct ilo_builder *builder)
 
 static inline void
 gen7_3DSTATE_DS(struct ilo_builder *builder,
-                const struct ilo_shader_state *ds,
-                int num_samplers)
+                const struct ilo_shader_state *ds)
 {
    const uint8_t cmd_len = 6;
    uint32_t *dw;
@@ -736,8 +731,7 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
 
 static inline void
 gen7_3DSTATE_GS(struct ilo_builder *builder,
-                const struct ilo_shader_state *gs,
-                int num_samplers)
+                const struct ilo_shader_state *gs)
 {
    const uint8_t cmd_len = 7;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
@@ -763,8 +757,6 @@ gen7_3DSTATE_GS(struct ilo_builder *builder,
    dw4 = cso->payload[1];
    dw5 = cso->payload[2];
 
-   dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = dw0;
@@ -1423,6 +1415,17 @@ gen6_SAMPLER_STATE(struct ilo_builder *builder,
    if (!num_samplers)
       return 0;
 
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 132:
+    *
+    *     "(Sampler Count of 3DSTATE_VS) Specifies how many samplers (in
+    *      multiples of 4) the vertex shader 0 kernel uses. Used only for
+    *      prefetching the associated sampler state entries.
+    *
+    * It also applies to other shader stages.
+    */
+   ilo_builder_dynamic_pad_top(builder, 4 * (4 - (num_samplers % 4)));
+
    state_offset = ilo_builder_dynamic_pointer(builder,
          ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw);
 
diff --git a/src/gallium/drivers/ilo/ilo_render_dynamic.c b/src/gallium/drivers/ilo/ilo_render_dynamic.c
index dfd29c3..5c36873 100644
--- a/src/gallium/drivers/ilo/ilo_render_dynamic.c
+++ b/src/gallium/drivers/ilo/ilo_render_dynamic.c
@@ -134,9 +134,8 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r,
       vec->sampler[shader_type].cso;
    const struct pipe_sampler_view * const *views =
       (const struct pipe_sampler_view **) vec->view[shader_type].states;
-   const int num_samplers = vec->sampler[shader_type].count;
-   const int num_views = vec->view[shader_type].count;
    uint32_t *sampler_state, *border_color_state;
+   int sampler_count;
    bool emit_border_color = false;
    bool skip = false;
 
@@ -145,26 +144,32 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r,
    /* SAMPLER_BORDER_COLOR_STATE and SAMPLER_STATE */
    switch (shader_type) {
    case PIPE_SHADER_VERTEX:
-      if (DIRTY(SAMPLER_VS) || DIRTY(VIEW_VS)) {
+      if (DIRTY(VS) || DIRTY(SAMPLER_VS) || DIRTY(VIEW_VS)) {
          sampler_state = &r->state.vs.SAMPLER_STATE;
          border_color_state = r->state.vs.SAMPLER_BORDER_COLOR_STATE;
 
-         if (DIRTY(SAMPLER_VS))
+         if (DIRTY(VS) || DIRTY(SAMPLER_VS))
             emit_border_color = true;
 
+         sampler_count = (vec->vs) ? ilo_shader_get_kernel_param(vec->vs,
+               ILO_KERNEL_SAMPLER_COUNT) : 0;
+
          session->sampler_vs_changed = true;
       } else {
          skip = true;
       }
       break;
    case PIPE_SHADER_FRAGMENT:
-      if (DIRTY(SAMPLER_FS) || DIRTY(VIEW_FS)) {
+      if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(VIEW_FS)) {
          sampler_state = &r->state.wm.SAMPLER_STATE;
          border_color_state = r->state.wm.SAMPLER_BORDER_COLOR_STATE;
 
-         if (DIRTY(SAMPLER_FS))
+         if (DIRTY(VS) || DIRTY(SAMPLER_FS))
             emit_border_color = true;
 
+         sampler_count = (vec->fs) ? ilo_shader_get_kernel_param(vec->fs,
+               ILO_KERNEL_SAMPLER_COUNT) : 0;
+
          session->sampler_fs_changed = true;
       } else {
          skip = true;
@@ -178,20 +183,20 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r,
    if (skip)
       return;
 
+   assert(sampler_count <= Elements(vec->view[shader_type].states) &&
+          sampler_count <= Elements(vec->sampler[shader_type].cso));
+
    if (emit_border_color) {
       int i;
 
-      for (i = 0; i < num_samplers; i++) {
+      for (i = 0; i < sampler_count; i++) {
          border_color_state[i] = (samplers[i]) ?
             gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, samplers[i]) : 0;
       }
    }
 
-   /* should we take the minimum of num_samplers and num_views? */
    *sampler_state = gen6_SAMPLER_STATE(r->builder,
-         samplers, views,
-         border_color_state,
-         MIN2(num_samplers, num_views));
+         samplers, views, border_color_state, sampler_count);
 }
 
 static void
@@ -322,14 +327,13 @@ ilo_render_get_draw_dynamic_states_len(const struct ilo_render *render,
 
    for (sh_type = 0; sh_type < PIPE_SHADER_TYPES; sh_type++) {
       const int alignment = 32 / 4;
-      int num_samplers, pcb_len;
-
-      num_samplers = vec->sampler[sh_type].count;
-      pcb_len = 0;
+      int num_samplers = 0, pcb_len = 0;
 
       switch (sh_type) {
       case PIPE_SHADER_VERTEX:
          if (vec->vs) {
+            num_samplers = ilo_shader_get_kernel_param(vec->vs,
+                  ILO_KERNEL_SAMPLER_COUNT);
             pcb_len = ilo_shader_get_kernel_param(vec->vs,
                   ILO_KERNEL_PCB_CBUF0_SIZE);
             pcb_len += ilo_shader_get_kernel_param(vec->vs,
@@ -340,6 +344,8 @@ ilo_render_get_draw_dynamic_states_len(const struct ilo_render *render,
          break;
       case PIPE_SHADER_FRAGMENT:
          if (vec->fs) {
+            num_samplers = ilo_shader_get_kernel_param(vec->fs,
+                  ILO_KERNEL_SAMPLER_COUNT);
             pcb_len = ilo_shader_get_kernel_param(vec->fs,
                   ILO_KERNEL_PCB_CBUF0_SIZE);
          }
@@ -350,6 +356,9 @@ ilo_render_get_draw_dynamic_states_len(const struct ilo_render *render,
 
       /* SAMPLER_STATE array and SAMPLER_BORDER_COLORs */
       if (num_samplers) {
+         /* prefetches are done in multiples of 4 */
+         num_samplers = align(num_samplers, 4);
+
          len += align(GEN6_SAMPLER_STATE__SIZE * num_samplers, alignment) +
             align(GEN6_SAMPLER_BORDER_COLOR__SIZE, alignment) * num_samplers;
       }
diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c
index 389b596..4221f69 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen6.c
@@ -496,8 +496,7 @@ gen6_draw_vs(struct ilo_render *r,
              const struct ilo_state_vector *vec,
              struct ilo_render_draw_session *session)
 {
-   const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) ||
-                                 r->instruction_bo_changed);
+   const bool emit_3dstate_vs = (DIRTY(VS) || r->instruction_bo_changed);
    const bool emit_3dstate_constant_vs = session->pcb_vs_changed;
 
    /*
@@ -516,11 +515,8 @@ gen6_draw_vs(struct ilo_render *r,
    }
 
    /* 3DSTATE_VS */
-   if (emit_3dstate_vs) {
-      const int num_samplers = vec->sampler[PIPE_SHADER_VERTEX].count;
-
-      gen6_3DSTATE_VS(r->builder, vec->vs, num_samplers);
-   }
+   if (emit_3dstate_vs)
+      gen6_3DSTATE_VS(r->builder, vec->vs);
 
    if (emit_3dstate_constant_vs && ilo_dev_gen(r->dev) == ILO_GEN(6))
       gen6_wa_post_3dstate_constant_vs(r);
@@ -692,9 +688,8 @@ gen6_draw_wm(struct ilo_render *r,
    }
 
    /* 3DSTATE_WM */
-   if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || DIRTY(DSA) ||
+   if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) ||
        DIRTY(RASTERIZER) || r->instruction_bo_changed) {
-      const int num_samplers = vec->sampler[PIPE_SHADER_FRAGMENT].count;
       const bool dual_blend = vec->blend->dual_blend;
       const bool cc_may_kill = (vec->dsa->dw_alpha ||
                                 vec->blend->alpha_to_coverage);
@@ -702,7 +697,7 @@ gen6_draw_wm(struct ilo_render *r,
       if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed)
          gen6_wa_pre_3dstate_wm_max_threads(r);
 
-      gen6_3DSTATE_WM(r->builder, vec->fs, num_samplers,
+      gen6_3DSTATE_WM(r->builder, vec->fs,
             vec->rasterizer, dual_blend, cc_may_kill, 0);
    }
 }
@@ -849,7 +844,7 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r,
                        const struct ilo_blitter *blitter)
 {
    gen6_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
-   gen6_3DSTATE_VS(r->builder, NULL, 0);
+   gen6_3DSTATE_VS(r->builder, NULL);
 
    gen6_wa_post_3dstate_constant_vs(r);
 
@@ -884,7 +879,7 @@ gen6_rectlist_wm(struct ilo_render *r,
    gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
 
    gen6_wa_pre_3dstate_wm_max_threads(r);
-   gen6_3DSTATE_WM(r->builder, NULL, 0, NULL, false, false, hiz_op);
+   gen6_3DSTATE_WM(r->builder, NULL, NULL, false, false, hiz_op);
 }
 
 static void
diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c
index 9aefc6f..e0e6d06 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen7.c
@@ -338,8 +338,7 @@ gen7_draw_vs(struct ilo_render *r,
       session->sampler_vs_changed;
    /* see gen6_draw_vs() */
    const bool emit_3dstate_constant_vs = session->pcb_vs_changed;
-   const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) ||
-           r->instruction_bo_changed);
+   const bool emit_3dstate_vs = (DIRTY(VS) || r->instruction_bo_changed);
 
    /* emit depth stall before any of the VS commands */
    if (emit_3dstate_binding_table || emit_3dstate_sampler_state ||
@@ -367,11 +366,8 @@ gen7_draw_vs(struct ilo_render *r,
    }
 
    /* 3DSTATE_VS */
-   if (emit_3dstate_vs) {
-      const int num_samplers = vec->sampler[PIPE_SHADER_VERTEX].count;
-
-      gen6_3DSTATE_VS(r->builder, vec->vs, num_samplers);
-   }
+   if (emit_3dstate_vs)
+      gen6_3DSTATE_VS(r->builder, vec->vs);
 }
 
 static void
@@ -382,7 +378,7 @@ gen7_draw_hs(struct ilo_render *r,
    /* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */
    if (r->hw_ctx_changed) {
       gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
-      gen7_3DSTATE_HS(r->builder, NULL, 0);
+      gen7_3DSTATE_HS(r->builder, NULL);
    }
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_HS */
@@ -408,7 +404,7 @@ gen7_draw_ds(struct ilo_render *r,
    /* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */
    if (r->hw_ctx_changed) {
       gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
-      gen7_3DSTATE_DS(r->builder, NULL, 0);
+      gen7_3DSTATE_DS(r->builder, NULL);
    }
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_DS */
@@ -425,7 +421,7 @@ gen7_draw_gs(struct ilo_render *r,
    /* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */
    if (r->hw_ctx_changed) {
       gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
-      gen7_3DSTATE_GS(r->builder, NULL, 0);
+      gen7_3DSTATE_GS(r->builder, NULL);
    }
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_GS */
@@ -541,9 +537,7 @@ gen7_draw_wm(struct ilo_render *r,
    }
 
    /* 3DSTATE_PS */
-   if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) ||
-       r->instruction_bo_changed) {
-      const int num_samplers = vec->sampler[PIPE_SHADER_FRAGMENT].count;
+   if (DIRTY(FS) || DIRTY(BLEND) || r->instruction_bo_changed) {
       const bool dual_blend = vec->blend->dual_blend;
 
       if ((ilo_dev_gen(r->dev) == ILO_GEN(7) ||
@@ -551,7 +545,7 @@ gen7_draw_wm(struct ilo_render *r,
           r->hw_ctx_changed)
          gen7_wa_pre_3dstate_ps_max_threads(r);
 
-      gen7_3DSTATE_PS(r->builder, vec->fs, num_samplers, dual_blend);
+      gen7_3DSTATE_PS(r->builder, vec->fs, dual_blend);
    }
 
    /* 3DSTATE_SCISSOR_STATE_POINTERS */
@@ -562,8 +556,7 @@ gen7_draw_wm(struct ilo_render *r,
 
    /* XXX what is the best way to know if this workaround is needed? */
    {
-      const bool emit_3dstate_ps =
-         (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND));
+      const bool emit_3dstate_ps = (DIRTY(FS) || DIRTY(BLEND));
       const bool emit_3dstate_depth_buffer =
          (DIRTY(FB) || DIRTY(DSA) || r->state_bo_changed);
 
@@ -729,18 +722,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r,
                        const struct ilo_blitter *blitter)
 {
    gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
-   gen6_3DSTATE_VS(r->builder, NULL, 0);
+   gen6_3DSTATE_VS(r->builder, NULL);
 
    gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
-   gen7_3DSTATE_HS(r->builder, NULL, 0);
+   gen7_3DSTATE_HS(r->builder, NULL);
 
    gen7_3DSTATE_TE(r->builder);
 
    gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
-   gen7_3DSTATE_DS(r->builder, NULL, 0);
+   gen7_3DSTATE_DS(r->builder, NULL);
 
    gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
-   gen7_3DSTATE_GS(r->builder, NULL, 0);
+   gen7_3DSTATE_GS(r->builder, NULL);
 
    gen7_3DSTATE_STREAMOUT(r->builder, 0x0, 0, false);
 
@@ -778,7 +771,7 @@ gen7_rectlist_wm(struct ilo_render *r,
    gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
 
    gen7_wa_pre_3dstate_ps_max_threads(r);
-   gen7_3DSTATE_PS(r->builder, NULL, 0, false);
+   gen7_3DSTATE_PS(r->builder, NULL, false);
 }
 
 static void
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index b43fce7..fdbd2b8 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -1000,6 +1000,9 @@ ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
    case ILO_KERNEL_OUTPUT_COUNT:
       val = kernel->out.count;
       break;
+   case ILO_KERNEL_SAMPLER_COUNT:
+      val = shader->info.num_samplers;
+      break;
    case ILO_KERNEL_URB_DATA_START_REG:
       val = kernel->in.start_grf;
       break;
diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h
index 80e9c19..77deee9 100644
--- a/src/gallium/drivers/ilo/ilo_shader.h
+++ b/src/gallium/drivers/ilo/ilo_shader.h
@@ -33,6 +33,7 @@
 enum ilo_kernel_param {
    ILO_KERNEL_INPUT_COUNT,
    ILO_KERNEL_OUTPUT_COUNT,
+   ILO_KERNEL_SAMPLER_COUNT,
    ILO_KERNEL_URB_DATA_START_REG,
    ILO_KERNEL_SKIP_CBUF0_UPLOAD,
    ILO_KERNEL_PCB_CBUF0_SIZE,
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index 6177ac0..18c1566 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -338,18 +338,6 @@ ilo_bind_sampler_states(struct pipe_context *pipe, unsigned shader,
          dst->cso[start + i] = NULL;
    }
 
-   if (dst->count <= start + count) {
-      if (samplers)
-         count += start;
-      else
-         count = start;
-
-      while (count > 0 && !dst->cso[count - 1])
-         count--;
-
-      dst->count = count;
-   }
-
    if (changed) {
       switch (shader) {
       case PIPE_SHADER_VERTEX:
diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h
index 3f3c495..c371716 100644
--- a/src/gallium/drivers/ilo/ilo_state.h
+++ b/src/gallium/drivers/ilo/ilo_state.h
@@ -292,7 +292,6 @@ struct ilo_sampler_cso {
 
 struct ilo_sampler_state {
    const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
-   unsigned count;
 };
 
 struct ilo_view_surface {
diff --git a/src/gallium/drivers/ilo/ilo_state_gen6.c b/src/gallium/drivers/ilo/ilo_state_gen6.c
index 0192c60..6f0c92d 100644
--- a/src/gallium/drivers/ilo/ilo_state_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_state_gen6.c
@@ -411,13 +411,14 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
                     const struct ilo_shader_state *vs,
                     struct ilo_shader_cso *cso)
 {
-   int start_grf, vue_read_len, max_threads;
+   int start_grf, vue_read_len, sampler_count, max_threads;
    uint32_t dw2, dw4, dw5;
 
    ILO_DEV_ASSERT(dev, 6, 7.5);
 
    start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
    vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
+   sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT);
 
    /*
     * From the Sandy Bridge PRM, volume 2 part 1, page 135:
@@ -464,6 +465,7 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
    }
 
    dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
    dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
          vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
@@ -933,13 +935,14 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
                          const struct ilo_shader_state *fs,
                          struct ilo_shader_cso *cso)
 {
-   int start_grf, input_count, interps, max_threads;
+   int start_grf, input_count, sampler_count, interps, max_threads;
    uint32_t dw2, dw4, dw5, dw6;
 
    ILO_DEV_ASSERT(dev, 6, 6);
 
    start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
    input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
+   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
    interps = ilo_shader_get_kernel_param(fs,
          ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
 
@@ -947,6 +950,7 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
    max_threads = (dev->gt == 2) ? 80 : 40;
 
    dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
    dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
          0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
diff --git a/src/gallium/drivers/ilo/ilo_state_gen7.c b/src/gallium/drivers/ilo/ilo_state_gen7.c
index 53c3aeb..f91a088 100644
--- a/src/gallium/drivers/ilo/ilo_state_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_state_gen7.c
@@ -39,13 +39,14 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
                          const struct ilo_shader_state *gs,
                          struct ilo_shader_cso *cso)
 {
-   int start_grf, vue_read_len, max_threads;
+   int start_grf, vue_read_len, sampler_count, max_threads;
    uint32_t dw2, dw4, dw5;
 
    ILO_DEV_ASSERT(dev, 7, 7.5);
 
    start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
    vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
+   sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT);
 
    /* in pairs */
    vue_read_len = (vue_read_len + 1) / 2;
@@ -63,6 +64,7 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
    }
 
    dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
    dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
          GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
@@ -131,15 +133,17 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
                          const struct ilo_shader_state *fs,
                          struct ilo_shader_cso *cso)
 {
-   int start_grf, max_threads;
+   int start_grf, sampler_count, max_threads;
    uint32_t dw2, dw4, dw5;
    uint32_t wm_interps, wm_dw1;
 
    ILO_DEV_ASSERT(dev, 7, 7.5);
 
    start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
+   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
 
    dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
    dw4 = GEN7_PS_DW4_POSOFFSET_NONE;
 




More information about the mesa-commit mailing list