Mesa (master): ilo: fix scratch space setup in core

Chia-I Wu olv at kemper.freedesktop.org
Fri Oct 23 09:30:29 UTC 2015


Module: Mesa
Branch: master
Commit: 4a7d18296a9e80d2c5458bf77f8eb88913433c90
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=4a7d18296a9e80d2c5458bf77f8eb88913433c90

Author: Chia-I Wu <olvaffe at gmail.com>
Date:   Fri Oct 23 00:24:26 2015 +0800

ilo: fix scratch space setup in core

Move scratch_size out of ilo_state_shader_kernel_info and
ilo_state_compute_interface_info.  A scratch space is shared by all
kernels/interfaces.  Update builder to emit relocs for scratch bos.

---

 .../drivers/ilo/core/ilo_builder_3d_bottom.h       |   33 +++++--
 src/gallium/drivers/ilo/core/ilo_builder_3d_top.h  |   99 ++++++++++++++++----
 src/gallium/drivers/ilo/core/ilo_state_compute.c   |   95 +++++++++++++------
 src/gallium/drivers/ilo/core/ilo_state_compute.h   |   12 ++-
 src/gallium/drivers/ilo/core/ilo_state_shader.c    |   74 ++++++++++-----
 src/gallium/drivers/ilo/core/ilo_state_shader.h    |   43 ++++++++-
 src/gallium/drivers/ilo/core/ilo_state_shader_ps.c |   52 +++++-----
 src/gallium/drivers/ilo/ilo_render_gen6.c          |   18 ++--
 src/gallium/drivers/ilo/ilo_render_gen7.c          |   28 +++---
 src/gallium/drivers/ilo/ilo_render_gen8.c          |    2 +-
 src/gallium/drivers/ilo/ilo_shader.c               |    4 +-
 11 files changed, 327 insertions(+), 133 deletions(-)

diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
index 5efe9da..2e9470e 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
@@ -202,14 +202,16 @@ static inline void
 gen6_3DSTATE_WM(struct ilo_builder *builder,
                 const struct ilo_state_raster *rs,
                 const struct ilo_state_ps *ps,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 9;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 6, 6);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
    dw[1] = kernel_offset;
@@ -221,6 +223,11 @@ gen6_3DSTATE_WM(struct ilo_builder *builder,
    dw[6] = rs->wm[2] | ps->ps[4];
    dw[7] = 0; /* kernel 1 */
    dw[8] = 0; /* kernel 2 */
+
+   if (ilo_state_ps_get_scratch_size(ps)) {
+      ilo_builder_batch_reloc(builder, pos + 2, scratch_bo,
+            ps->ps[0], 0);
+   }
 }
 
 static inline void
@@ -329,14 +336,16 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder)
 static inline void
 gen7_3DSTATE_PS(struct ilo_builder *builder,
                 const struct ilo_state_ps *ps,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 8;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
    dw[1] = kernel_offset;
@@ -347,19 +356,26 @@ gen7_3DSTATE_PS(struct ilo_builder *builder,
    dw[5] = ps->ps[5];
    dw[6] = 0; /* kernel 1 */
    dw[7] = 0; /* kernel 2 */
+
+   if (ilo_state_ps_get_scratch_size(ps)) {
+      ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+            ps->ps[3], 0);
+   }
 }
 
 static inline void
 gen8_3DSTATE_PS(struct ilo_builder *builder,
                 const struct ilo_state_ps *ps,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 12;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
    dw[1] = kernel_offset;
@@ -374,6 +390,11 @@ gen8_3DSTATE_PS(struct ilo_builder *builder,
    dw[9] = 0;
    dw[10] = 0; /* kernel 2 */
    dw[11] = 0;
+
+   if (ilo_state_ps_get_scratch_size(ps)) {
+      ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+            ps->ps[1], 0);
+   }
 }
 
 static inline void
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
index 6e94fb2..3a44871 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
@@ -477,14 +477,16 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
 static inline void
 gen6_3DSTATE_VS(struct ilo_builder *builder,
                 const struct ilo_state_vs *vs,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 6;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
    dw[1] = kernel_offset;
@@ -493,19 +495,26 @@ gen6_3DSTATE_VS(struct ilo_builder *builder,
    dw[3] = vs->vs[1];
    dw[4] = vs->vs[2];
    dw[5] = vs->vs[3];
+
+   if (ilo_state_vs_get_scratch_size(vs)) {
+      ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+            vs->vs[1], 0);
+   }
 }
 
 static inline void
 gen8_3DSTATE_VS(struct ilo_builder *builder,
                 const struct ilo_state_vs *vs,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 9;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
    dw[1] = kernel_offset;
@@ -517,19 +526,26 @@ gen8_3DSTATE_VS(struct ilo_builder *builder,
    dw[6] = vs->vs[2];
    dw[7] = vs->vs[3];
    dw[8] = vs->vs[4];
+
+   if (ilo_state_vs_get_scratch_size(vs)) {
+      ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+            vs->vs[1], 0);
+   }
 }
 
 static inline void
 gen7_3DSTATE_HS(struct ilo_builder *builder,
                 const struct ilo_state_hs *hs,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 7;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
    /* see hs_set_gen7_3DSTATE_HS() */
@@ -539,19 +555,26 @@ gen7_3DSTATE_HS(struct ilo_builder *builder,
    dw[4] = hs->hs[2];
    dw[5] = hs->hs[3];
    dw[6] = 0;
+
+   if (ilo_state_hs_get_scratch_size(hs)) {
+      ilo_builder_batch_reloc(builder, pos + 4, scratch_bo,
+            hs->hs[2], 0);
+   }
 }
 
 static inline void
 gen8_3DSTATE_HS(struct ilo_builder *builder,
                 const struct ilo_state_hs *hs,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 9;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
    /* see hs_set_gen7_3DSTATE_HS() */
@@ -563,6 +586,11 @@ gen8_3DSTATE_HS(struct ilo_builder *builder,
    dw[6] = 0;
    dw[7] = hs->hs[3];
    dw[8] = 0;
+
+   if (ilo_state_hs_get_scratch_size(hs)) {
+      ilo_builder_batch_reloc64(builder, pos + 5, scratch_bo,
+            hs->hs[2], 0);
+   }
 }
 
 static inline void
@@ -586,14 +614,16 @@ gen7_3DSTATE_TE(struct ilo_builder *builder,
 static inline void
 gen7_3DSTATE_DS(struct ilo_builder *builder,
                 const struct ilo_state_ds *ds,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 6;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
    /* see ds_set_gen7_3DSTATE_DS() */
@@ -602,19 +632,26 @@ gen7_3DSTATE_DS(struct ilo_builder *builder,
    dw[3] = ds->ds[1];
    dw[4] = ds->ds[2];
    dw[5] = ds->ds[3];
+
+   if (ilo_state_ds_get_scratch_size(ds)) {
+      ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+            ds->ds[1], 0);
+   }
 }
 
 static inline void
 gen8_3DSTATE_DS(struct ilo_builder *builder,
                 const struct ilo_state_ds *ds,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 9;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
    /* see ds_set_gen7_3DSTATE_DS() */
@@ -626,19 +663,26 @@ gen8_3DSTATE_DS(struct ilo_builder *builder,
    dw[6] = ds->ds[2];
    dw[7] = ds->ds[3];
    dw[8] = ds->ds[4];
+
+   if (ilo_state_ds_get_scratch_size(ds)) {
+      ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+            ds->ds[1], 0);
+   }
 }
 
 static inline void
 gen6_3DSTATE_GS(struct ilo_builder *builder,
                 const struct ilo_state_gs *gs,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 7;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 6, 6);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
    dw[1] = kernel_offset;
@@ -648,6 +692,11 @@ gen6_3DSTATE_GS(struct ilo_builder *builder,
    dw[4] = gs->gs[2];
    dw[5] = gs->gs[3];
    dw[6] = gs->gs[4];
+
+   if (ilo_state_gs_get_scratch_size(gs)) {
+      ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+            gs->gs[1], 0);
+   }
 }
 
 static inline void
@@ -677,14 +726,16 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
 static inline void
 gen7_3DSTATE_GS(struct ilo_builder *builder,
                 const struct ilo_state_gs *gs,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 7;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
    dw[1] = kernel_offset;
@@ -694,19 +745,26 @@ gen7_3DSTATE_GS(struct ilo_builder *builder,
    dw[4] = gs->gs[2];
    dw[5] = gs->gs[3];
    dw[6] = 0;
+
+   if (ilo_state_gs_get_scratch_size(gs)) {
+      ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+            gs->gs[1], 0);
+   }
 }
 
 static inline void
 gen8_3DSTATE_GS(struct ilo_builder *builder,
                 const struct ilo_state_gs *gs,
-                uint32_t kernel_offset)
+                uint32_t kernel_offset,
+                struct intel_bo *scratch_bo)
 {
    const uint8_t cmd_len = 10;
    uint32_t *dw;
+   unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
    dw[1] = kernel_offset;
@@ -719,6 +777,11 @@ gen8_3DSTATE_GS(struct ilo_builder *builder,
    dw[7] = gs->gs[3];
    dw[8] = 0;
    dw[9] = gs->gs[4];
+
+   if (ilo_state_gs_get_scratch_size(gs)) {
+      ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+            gs->gs[1], 0);
+   }
 }
 
 static inline void
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c
index a5fe5e1..ba3ff90 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_compute.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c
@@ -158,7 +158,8 @@ compute_interface_get_gen6_read_end(const struct ilo_dev *dev,
     */
    assert(per_thread_read <= 63);
 
-   /* From the Haswell PRM, volume 2d, page 199:
+   /*
+    * From the Haswell PRM, volume 2d, page 199:
     *
     *     "(Cross-Thread Constant Data Read Length) [0,127]"
     */
@@ -210,38 +211,68 @@ compute_validate_gen6(const struct ilo_dev *dev,
    return true;
 }
 
-static uint8_t
-compute_get_gen6_scratch_space(const struct ilo_dev *dev,
-                               const struct ilo_state_compute_info *info)
+static uint32_t
+compute_get_gen6_per_thread_scratch_size(const struct ilo_dev *dev,
+                                         const struct ilo_state_compute_info *info,
+                                         uint8_t *per_thread_space)
 {
-   uint32_t scratch_size = 0;
-   uint8_t i;
+   ILO_DEV_ASSERT(dev, 6, 7);
 
-   ILO_DEV_ASSERT(dev, 6, 8);
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 2, page 30:
+    *
+    *     "(Per Thread Scratch Space)
+    *      Range = [0,11] indicating [1k bytes, 12k bytes] [DevSNB]"
+    */
+   assert(info->per_thread_scratch_size <= 12 * 1024);
 
-   for (i = 0; i < info->interface_count; i++) {
-      if (scratch_size < info->interfaces[i].scratch_size)
-         scratch_size = info->interfaces[i].scratch_size;
+   if (!info->per_thread_scratch_size) {
+      *per_thread_space = 0;
+      return 0;
    }
 
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      assert(scratch_size <= 2 * 1024 * 1024);
+   *per_thread_space = (info->per_thread_scratch_size > 1024) ?
+      (info->per_thread_scratch_size - 1) / 1024 : 0;
+
+   return 1024 * (1 + *per_thread_space);
+}
 
-      /* next power of two, starting from 1KB */
-      return (scratch_size > 1024) ?
-         (util_last_bit(scratch_size - 1) - 10): 0;
-   } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
-      assert(scratch_size <= 2 * 1024 * 1024);
+static uint32_t
+compute_get_gen75_per_thread_scratch_size(const struct ilo_dev *dev,
+                                          const struct ilo_state_compute_info *info,
+                                          uint8_t *per_thread_space)
+{
+   ILO_DEV_ASSERT(dev, 7.5, 8);
 
-      /* next power of two, starting from 2KB */
-      return (scratch_size > 2048) ?
-         (util_last_bit(scratch_size - 1) - 11): 0;
-   } else {
-      assert(scratch_size <= 12 * 1024);
+   /*
+    * From the Haswell PRM, volume 2b, page 407:
+    *
+    *     "(Per Thread Scratch Space)
+    *      [0,10]  Indicating [2k bytes, 2 Mbytes]"
+    *
+    *     "Note: The scratch space should be declared as 2x the desired
+    *      scratch space. The stack will start at the half-way point instead
+    *      of the end. The upper half of scratch space will not be accessed
+    *      and so does not have to be allocated in memory."
+    *
+    * From the Broadwell PRM, volume 2a, page 450:
+    *
+    *     "(Per Thread Scratch Space)
+    *      [0,11]  indicating [1k bytes, 2 Mbytes]"
+    */
+   assert(info->per_thread_scratch_size <=
+         ((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 2 : 1) * 1024 * 1024);
 
-      return (scratch_size > 1024) ?
-         (scratch_size - 1) / 1024 : 0;
+   if (!info->per_thread_scratch_size) {
+      *per_thread_space = 0;
+      return 0;
    }
+
+   /* next power of two, starting from 1KB */
+   *per_thread_space = (info->per_thread_scratch_size > 1024) ?
+      (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
+
+   return 1 << (10 + *per_thread_space);
 }
 
 static bool
@@ -250,7 +281,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
                                  const struct ilo_state_compute_info *info)
 {
    struct compute_urb_configuration urb;
-   uint8_t scratch_space;
+   uint32_t per_thread_size;
+   uint8_t per_thread_space;
 
    uint32_t dw1, dw2, dw4;
 
@@ -260,9 +292,16 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
        !compute_validate_gen6(dev, info, &urb))
       return false;
 
-   scratch_space = compute_get_gen6_scratch_space(dev, info);
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+      per_thread_size = compute_get_gen75_per_thread_scratch_size(dev,
+            info, &per_thread_space);
+   } else {
+      per_thread_size = compute_get_gen6_per_thread_scratch_size(dev,
+            info, &per_thread_space);
+   }
+
+   dw1 = per_thread_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
 
-   dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
    dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
          urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
          GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
@@ -281,6 +320,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
    compute->vfe[1] = dw2;
    compute->vfe[2] = dw4;
 
+   compute->scratch_size = per_thread_size * dev->thread_count;
+
    return true;
 }
 
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h
index 346f7b6..bd56bba 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_compute.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h
@@ -45,8 +45,6 @@ struct ilo_state_compute_interface_info {
    /* usually 0 unless there are multiple interfaces */
    uint32_t kernel_offset;
 
-   uint32_t scratch_size;
-
    uint8_t sampler_count;
    uint8_t surface_count;
 
@@ -65,6 +63,8 @@ struct ilo_state_compute_info {
    const struct ilo_state_compute_interface_info *interfaces;
    uint8_t interface_count;
 
+   uint32_t per_thread_scratch_size;
+
    uint32_t cv_urb_alloc_size;
    uint32_t curbe_alloc_size;
 };
@@ -74,6 +74,8 @@ struct ilo_state_compute {
 
    uint32_t (*idrt)[6];
    uint8_t idrt_count;
+
+   uint32_t scratch_size;
 };
 
 static inline size_t
@@ -89,4 +91,10 @@ ilo_state_compute_init(struct ilo_state_compute *compute,
                        const struct ilo_dev *dev,
                        const struct ilo_state_compute_info *info);
 
+static inline uint32_t
+ilo_state_compute_get_scratch_size(const struct ilo_state_compute *compute)
+{
+   return compute->scratch_size;
+}
+
 #endif /* ILO_STATE_COMPUTE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.c b/src/gallium/drivers/ilo/core/ilo_state_shader.c
index 2e06b07..aec4fd6 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader.c
@@ -37,7 +37,9 @@ enum vertex_stage {
 
 struct vertex_ff {
    uint8_t grf_start;
-   uint8_t scratch_space;
+
+   uint8_t per_thread_scratch_space;
+   uint32_t per_thread_scratch_size;
 
    uint8_t sampler_count;
    uint8_t surface_count;
@@ -59,13 +61,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
     * others.
     */
    const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 134:
-    *
-    *     "(Per-Thread Scratch Space)
-    *      Range    [0,11] indicating [1K Bytes, 2M Bytes]"
-    */
-   const uint32_t max_scratch_size = 2 * 1024 * 1024;
 
    ILO_DEV_ASSERT(dev, 6, 8);
 
@@ -73,7 +68,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
    assert(!kernel->offset);
 
    assert(kernel->grf_start < max_grf_start);
-   assert(kernel->scratch_size <= max_scratch_size);
 
    return true;
 }
@@ -112,18 +106,33 @@ vertex_get_gen6_ff(const struct ilo_dev *dev,
                    const struct ilo_state_shader_kernel_info *kernel,
                    const struct ilo_state_shader_resource_info *resource,
                    const struct ilo_state_shader_urb_info *urb,
+                   uint32_t per_thread_scratch_size,
                    struct vertex_ff *ff)
 {
    ILO_DEV_ASSERT(dev, 6, 8);
 
+   memset(ff, 0, sizeof(*ff));
+
    if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
        !vertex_validate_gen6_urb(dev, stage, urb))
       return false;
 
    ff->grf_start = kernel->grf_start;
-   /* next power of two, starting from 1KB */
-   ff->scratch_space = (kernel->scratch_size > 1024) ?
-      (util_last_bit(kernel->scratch_size - 1) - 10): 0;
+
+   if (per_thread_scratch_size) {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 134:
+       *
+       *     "(Per-Thread Scratch Space)
+       *      Range    [0,11] indicating [1K Bytes, 2M Bytes]"
+       */
+      assert(per_thread_scratch_size <= 2 * 1024 * 1024);
+
+      /* next power of two, starting from 1KB */
+      ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ?
+         (util_last_bit(per_thread_scratch_size - 1) - 10) : 0;
+      ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
+   }
 
    ff->sampler_count = (resource->sampler_count <= 12) ?
       (resource->sampler_count + 3) / 4 : 4;
@@ -192,8 +201,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
 
    ILO_DEV_ASSERT(dev, 6, 8);
 
-   if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel,
-            &info->resource, &info->urb, &ff))
+   if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource,
+            &info->urb, info->per_thread_scratch_size, &ff))
       return false;
 
    thread_count = vs_get_gen6_thread_count(dev, info);
@@ -207,7 +216,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
       dw2 |= GEN75_THREADDISP_ACCESS_UAV;
 
-   dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+   dw3 = ff.per_thread_scratch_space <<
+      GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
 
    dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
          ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
@@ -234,6 +244,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
    if (ilo_dev_gen(dev) >= ILO_GEN(8))
       vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
 
+   vs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
    return true;
 }
 
@@ -273,8 +285,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
 
    ILO_DEV_ASSERT(dev, 7, 8);
 
-   if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel,
-            &info->resource, &info->urb, &ff))
+   if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource,
+            &info->urb, info->per_thread_scratch_size, &ff))
       return false;
 
    thread_count = hs_get_gen7_thread_count(dev, info);
@@ -296,7 +308,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
    if (info->stats_enable)
       dw2 |= GEN7_HS_DW2_STATISTICS;
 
-   dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+   dw4 = ff.per_thread_scratch_space <<
+      GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
 
    dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
          ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
@@ -312,6 +325,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
    hs->hs[2] = dw4;
    hs->hs[3] = dw5;
 
+   hs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
    return true;
 }
 
@@ -375,8 +390,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
 
    ILO_DEV_ASSERT(dev, 7, 8);
 
-   if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel,
-            &info->resource, &info->urb, &ff))
+   if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource,
+            &info->urb, info->per_thread_scratch_size, &ff))
       return false;
 
    thread_count = ds_get_gen7_thread_count(dev, info);
@@ -387,7 +402,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
       dw2 |= GEN75_THREADDISP_ACCESS_UAV;
 
-   dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+   dw3 = ff.per_thread_scratch_space <<
+      GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
 
    dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
          ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
@@ -414,6 +430,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
    if (ilo_dev_gen(dev) >= ILO_GEN(8))
       ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
 
+   ds->scratch_size = ff.per_thread_scratch_size * thread_count;
+
    return true;
 }
 
@@ -427,8 +445,8 @@ gs_get_gen6_ff(const struct ilo_dev *dev,
 
    ILO_DEV_ASSERT(dev, 6, 8);
 
-   if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel,
-            &info->resource, &info->urb, ff))
+   if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource,
+            &info->urb, info->per_thread_scratch_size, ff))
       return false;
 
    /*
@@ -512,7 +530,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
          ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
          ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
 
-   dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+   dw3 = ff.per_thread_scratch_space <<
+      GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
 
    dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
          ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
@@ -552,6 +571,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
    gs->gs[3] = dw5;
    gs->gs[4] = dw6;
 
+   gs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
    return true;
 }
 
@@ -590,7 +611,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
       dw2 |= GEN75_THREADDISP_ACCESS_UAV;
 
-   dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+   dw3 = ff.per_thread_scratch_space <<
+      GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
 
    dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
          0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
@@ -620,6 +642,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
    if (ilo_dev_gen(dev) >= ILO_GEN(8))
       gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
 
+   gs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
    return true;
 }
 
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.h b/src/gallium/drivers/ilo/core/ilo_state_shader.h
index 44690c5..3565109 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader.h
@@ -42,8 +42,6 @@ struct ilo_state_shader_kernel_info {
 
    uint8_t grf_start;
    uint8_t pcb_attr_count;
-
-   uint32_t scratch_size;
 };
 
 /**
@@ -77,6 +75,7 @@ struct ilo_state_vs_info {
    struct ilo_state_shader_resource_info resource;
    struct ilo_state_shader_urb_info urb;
 
+   uint32_t per_thread_scratch_size;
    bool dispatch_enable;
    bool stats_enable;
 };
@@ -86,6 +85,7 @@ struct ilo_state_hs_info {
    struct ilo_state_shader_resource_info resource;
    struct ilo_state_shader_urb_info urb;
 
+   uint32_t per_thread_scratch_size;
    bool dispatch_enable;
    bool stats_enable;
 };
@@ -95,6 +95,7 @@ struct ilo_state_ds_info {
    struct ilo_state_shader_resource_info resource;
    struct ilo_state_shader_urb_info urb;
 
+   uint32_t per_thread_scratch_size;
    bool dispatch_enable;
    bool stats_enable;
 };
@@ -119,6 +120,7 @@ struct ilo_state_gs_info {
 
    struct ilo_state_gs_sol_info sol;
 
+   uint32_t per_thread_scratch_size;
    bool dispatch_enable;
    bool stats_enable;
 };
@@ -158,6 +160,8 @@ struct ilo_state_ps_info {
    struct ilo_state_ps_io_info io;
    struct ilo_state_ps_params_info params;
 
+   uint32_t per_thread_scratch_size;
+
    /* bitmask of GEN6_PS_DISPATCH_x */
    uint8_t valid_kernels;
    bool per_sample_dispatch;
@@ -173,23 +177,28 @@ struct ilo_state_ps_info {
 
 struct ilo_state_vs {
    uint32_t vs[5];
+   uint32_t scratch_size;
 };
 
 struct ilo_state_hs {
    uint32_t hs[4];
+   uint32_t scratch_size;
 };
 
 struct ilo_state_ds {
    uint32_t te[3];
    uint32_t ds[5];
+   uint32_t scratch_size;
 };
 
 struct ilo_state_gs {
    uint32_t gs[5];
+   uint32_t scratch_size;
 };
 
 struct ilo_state_ps {
    uint32_t ps[8];
+   uint32_t scratch_size;
 
    struct ilo_state_ps_dispatch_conds {
       bool ps_valid;
@@ -211,6 +220,12 @@ bool
 ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
                            const struct ilo_dev *dev);
 
+static inline uint32_t
+ilo_state_vs_get_scratch_size(const struct ilo_state_vs *vs)
+{
+   return vs->scratch_size;
+}
+
 bool
 ilo_state_hs_init(struct ilo_state_hs *hs,
                   const struct ilo_dev *dev,
@@ -221,6 +236,12 @@ ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
                            const struct ilo_dev *dev);
 
 
+static inline uint32_t
+ilo_state_hs_get_scratch_size(const struct ilo_state_hs *hs)
+{
+   return hs->scratch_size;
+}
+
 bool
 ilo_state_ds_init(struct ilo_state_ds *ds,
                   const struct ilo_dev *dev,
@@ -230,6 +251,12 @@ bool
 ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
                            const struct ilo_dev *dev);
 
+static inline uint32_t
+ilo_state_ds_get_scratch_size(const struct ilo_state_ds *ds)
+{
+   return ds->scratch_size;
+}
+
 bool
 ilo_state_gs_init(struct ilo_state_gs *gs,
                   const struct ilo_dev *dev,
@@ -239,6 +266,12 @@ bool
 ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
                            const struct ilo_dev *dev);
 
+static inline uint32_t
+ilo_state_gs_get_scratch_size(const struct ilo_state_gs *gs)
+{
+   return gs->scratch_size;
+}
+
 bool
 ilo_state_ps_init(struct ilo_state_ps *ps,
                   const struct ilo_dev *dev,
@@ -253,4 +286,10 @@ ilo_state_ps_set_params(struct ilo_state_ps *ps,
                         const struct ilo_dev *dev,
                         const struct ilo_state_ps_params_info *params);
 
+static inline uint32_t
+ilo_state_ps_get_scratch_size(const struct ilo_state_ps *ps)
+{
+   return ps->scratch_size;
+}
+
 #endif /* ILO_STATE_SHADER_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
index ceeb68a..5c3ca1e 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
@@ -34,7 +34,8 @@ struct pixel_ff {
    uint32_t kernel_offsets[3];
    uint8_t grf_starts[3];
    bool pcb_enable;
-   uint8_t scratch_space;
+   uint8_t per_thread_scratch_space;
+   uint32_t per_thread_scratch_size;
 
    uint8_t sampler_count;
    uint8_t surface_count;
@@ -56,13 +57,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
 {
    /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
    const uint8_t max_grf_start = 128;
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 271:
-    *
-    *     "(Per-Thread Scratch Space)
-    *      Range  [0,11] indicating [1k bytes, 2M bytes] in powers of two"
-    */
-   const uint32_t max_scratch_size = 2 * 1024 * 1024;
 
    ILO_DEV_ASSERT(dev, 6, 8);
 
@@ -70,7 +64,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
    assert(kernel->offset % 64 == 0);
 
    assert(kernel->grf_start < max_grf_start);
-   assert(kernel->scratch_size <= max_scratch_size);
 
    return true;
 }
@@ -325,7 +318,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
    const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
    const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
    const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
-   uint32_t scratch_size;
 
    ILO_DEV_ASSERT(dev, 6, 8);
 
@@ -363,21 +355,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
                      ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
                       kernel_32->pcb_attr_count));
 
-   scratch_size = 0;
-   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
-       scratch_size < kernel_8->scratch_size)
-      scratch_size = kernel_8->scratch_size;
-   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
-       scratch_size < kernel_16->scratch_size)
-      scratch_size = kernel_16->scratch_size;
-   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
-       scratch_size < kernel_32->scratch_size)
-      scratch_size = kernel_32->scratch_size;
-
-   /* next power of two, starting from 1KB */
-   ff->scratch_space = (scratch_size > 1024) ?
-      (util_last_bit(scratch_size - 1) - 10): 0;
-
    /* GPU hangs on Haswell if none of the dispatch mode bits is set */
    if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
       ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
@@ -401,6 +378,21 @@ ps_get_gen6_ff(const struct ilo_dev *dev,
    if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
       return false;
 
+   if (info->per_thread_scratch_size) {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 271:
+       *
+       *     "(Per-Thread Scratch Space)
+       *      Range  [0,11] indicating [1k bytes, 2M bytes] in powers of two"
+       */
+      assert(info->per_thread_scratch_size <= 2 * 1024 * 1024);
+
+      /* next power of two, starting from 1KB */
+      ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ?
+         (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
+      ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
+   }
+
    ff->sampler_count = (resource->sampler_count <= 12) ?
       (resource->sampler_count + 3) / 4 : 4;
    ff->surface_count = resource->surface_count;
@@ -441,7 +433,8 @@ ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
    if (false)
       dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
 
-   dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+   dw3 = ff->per_thread_scratch_space <<
+      GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
 
    dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
          ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
@@ -539,7 +532,8 @@ ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
    if (false)
       dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
 
-   dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+   dw3 = ff->per_thread_scratch_space <<
+      GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
 
    dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
          ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
@@ -603,7 +597,8 @@ ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
    if (false)
       dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
 
-   dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+   dw4 = ff->per_thread_scratch_space <<
+      GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
 
    dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
          io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
@@ -705,6 +700,7 @@ ilo_state_ps_init(struct ilo_state_ps *ps,
       ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
    }
 
+   ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count;
    /* save conditions */
    ps->conds = ff.conds;
 
diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c
index c1f759f..c81514f 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen6.c
@@ -476,9 +476,9 @@ gen6_draw_vs(struct ilo_render *r,
 
       if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
           ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))
-         gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset);
+         gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset, NULL);
       else
-         gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+         gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
    }
 }
 
@@ -501,7 +501,7 @@ gen6_draw_gs(struct ilo_render *r,
          cso = ilo_shader_get_kernel_cso(vec->gs);
          kernel_offset = ilo_shader_get_kernel_offset(vec->gs);
 
-         gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset);
+         gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset, NULL);
       } else if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
             ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
          const int verts_per_prim =
@@ -524,9 +524,9 @@ gen6_draw_gs(struct ilo_render *r,
          kernel_offset = ilo_shader_get_kernel_offset(vec->vs) +
             ilo_shader_get_kernel_param(vec->vs, param);
 
-         gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset);
+         gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset, NULL);
       } else {
-         gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0);
+         gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL);
       }
    }
 }
@@ -672,7 +672,7 @@ gen6_draw_wm(struct ilo_render *r,
          gen6_wa_pre_3dstate_wm_max_threads(r);
 
       gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
-            &cso->ps, kernel_offset);
+            &cso->ps, kernel_offset, NULL);
    }
 }
 
@@ -817,10 +817,10 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r,
    gen6_wa_post_3dstate_constant_vs(r);
 
    gen6_wa_pre_3dstate_vs_toggle(r);
-   gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
+   gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
 
    gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
-   gen6_3DSTATE_GS(r->builder, &blitter->gs, 0);
+   gen6_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
 
    gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
    gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe);
@@ -833,7 +833,7 @@ gen6_rectlist_wm(struct ilo_render *r,
    gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
 
    gen6_wa_pre_3dstate_wm_max_threads(r);
-   gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0);
+   gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0, NULL);
 }
 
 static void
diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c
index 6623a8b..97d9d05 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen7.c
@@ -319,9 +319,9 @@ gen7_draw_vs(struct ilo_render *r,
       const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
 
       if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
-         gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+         gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
       else
-         gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+         gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
    }
 }
 
@@ -338,9 +338,9 @@ gen7_draw_hs(struct ilo_render *r,
       gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
 
       if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
-         gen8_3DSTATE_HS(r->builder, hs, kernel_offset);
+         gen8_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
       else
-         gen7_3DSTATE_HS(r->builder, hs, kernel_offset);
+         gen7_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
    }
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_HS */
@@ -373,9 +373,9 @@ gen7_draw_ds(struct ilo_render *r,
       gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
 
       if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
-         gen8_3DSTATE_DS(r->builder, ds, kernel_offset);
+         gen8_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
       else
-         gen7_3DSTATE_DS(r->builder, ds, kernel_offset);
+         gen7_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
    }
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_DS */
@@ -397,9 +397,9 @@ gen7_draw_gs(struct ilo_render *r,
       gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
 
       if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
-         gen8_3DSTATE_GS(r->builder, gs, kernel_offset);
+         gen8_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
       else
-         gen7_3DSTATE_GS(r->builder, gs, kernel_offset);
+         gen7_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
    }
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_GS */
@@ -534,7 +534,7 @@ gen7_draw_wm(struct ilo_render *r,
       if (r->hw_ctx_changed)
          gen7_wa_pre_3dstate_ps_max_threads(r);
 
-      gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
+      gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL);
    }
 
    /* 3DSTATE_SCISSOR_STATE_POINTERS */
@@ -678,18 +678,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r,
                        const struct ilo_blitter *blitter)
 {
    gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
-   gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
+   gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
 
    gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
-   gen7_3DSTATE_HS(r->builder, &blitter->hs, 0);
+   gen7_3DSTATE_HS(r->builder, &blitter->hs, 0, NULL);
 
    gen7_3DSTATE_TE(r->builder, &blitter->ds);
 
    gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
-   gen7_3DSTATE_DS(r->builder, &blitter->ds, 0);
+   gen7_3DSTATE_DS(r->builder, &blitter->ds, 0, NULL);
 
    gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
-   gen7_3DSTATE_GS(r->builder, &blitter->gs, 0);
+   gen7_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
 
    gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
 
@@ -711,7 +711,7 @@ gen7_rectlist_wm(struct ilo_render *r,
    gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
 
    gen7_wa_pre_3dstate_ps_max_threads(r);
-   gen7_3DSTATE_PS(r->builder, &blitter->ps, 0);
+   gen7_3DSTATE_PS(r->builder, &blitter->ps, 0, NULL);
 }
 
 static void
diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c
index 65494b4..1f750a2 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen8.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen8.c
@@ -125,7 +125,7 @@ gen8_draw_wm(struct ilo_render *r,
 
    /* 3DSTATE_PS */
    if (DIRTY(FS) || r->instruction_bo_changed)
-      gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
+      gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL);
 
    /* 3DSTATE_PS_EXTRA */
    if (DIRTY(FS))
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index 73b625e..c78d0e0 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -578,7 +578,6 @@ init_shader_kernel(const struct ilo_shader *kernel,
    kern->grf_start = kernel->in.start_grf;
    kern->pcb_attr_count =
       (kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16;
-   kern->scratch_size = 0;
 }
 
 static void
@@ -602,6 +601,7 @@ init_vs(struct ilo_shader *kernel,
    init_shader_urb(kernel, state, &info.urb);
    init_shader_kernel(kernel, state, &info.kernel);
    init_shader_resource(kernel, state, &info.resource);
+   info.per_thread_scratch_size = 0;
    info.dispatch_enable = true;
    info.stats_enable = true;
 
@@ -640,6 +640,7 @@ init_gs(struct ilo_shader *kernel,
    init_shader_urb(kernel, state, &info.urb);
    init_shader_kernel(kernel, state, &info.kernel);
    init_shader_resource(kernel, state, &info.resource);
+   info.per_thread_scratch_size = 0;
    info.dispatch_enable = true;
    info.stats_enable = true;
 
@@ -664,6 +665,7 @@ init_ps(struct ilo_shader *kernel,
    init_shader_kernel(kernel, state, &info.kernel_8);
    init_shader_resource(kernel, state, &info.resource);
 
+   info.per_thread_scratch_size = 0;
    info.io.has_rt_write = true;
    info.io.posoffset = GEN6_POSOFFSET_NONE;
    info.io.attr_count = kernel->in.count;




More information about the mesa-commit mailing list