Mesa (master): ilo: add ilo_state_compute

Mon Jun 22 04:57:06 UTC 2015

Module: Mesa
Branch: master
Commit: 57bdcae9e0fbf639014cd375543a8dd356406ac0
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=57bdcae9e0fbf639014cd375543a8dd356406ac0

Author: Chia-I Wu <olvaffe at gmail.com>
Date:   Sat Jun 20 23:27:08 2015 +0800

ilo: add ilo_state_compute

Replace gen6_idrt_data with ilo_state_compute, which has a bunch of
validations and is now preferred.

---

 src/gallium/drivers/ilo/Makefile.sources         |    2 +
 src/gallium/drivers/ilo/core/ilo_builder_media.h |  106 ++----
 src/gallium/drivers/ilo/core/ilo_state_compute.c |  435 ++++++++++++++++++++++
 src/gallium/drivers/ilo/core/ilo_state_compute.h |   92 +++++
 src/gallium/drivers/ilo/ilo_render_dynamic.c     |   36 +-
 src/gallium/drivers/ilo/ilo_render_gen.h         |    3 +
 src/gallium/drivers/ilo/ilo_render_media.c       |    3 +-
 src/gallium/drivers/ilo/ilo_state.h              |    1 +
 8 files changed, 586 insertions(+), 92 deletions(-)

diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources
index e5a0950..95b6b7a 100644
--- a/src/gallium/drivers/ilo/Makefile.sources
+++ b/src/gallium/drivers/ilo/Makefile.sources
@@ -21,6 +21,8 @@ C_SOURCES := \
 	core/ilo_image.h \
 	core/ilo_state_cc.c \
 	core/ilo_state_cc.h \
+	core/ilo_state_compute.c \
+	core/ilo_state_compute.h \
 	core/ilo_state_raster.c \
 	core/ilo_state_raster.h \
 	core/ilo_state_sampler.c \
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_media.h b/src/gallium/drivers/ilo/core/ilo_builder_media.h
index 7fbe6d4..7197104 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_media.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_media.h
@@ -29,57 +29,30 @@
 #define ILO_BUILDER_MEDIA_H
 
 #include "genhw/genhw.h"
-#include "../ilo_shader.h"
 #include "intel_winsys.h"
 
 #include "ilo_core.h"
 #include "ilo_dev.h"
+#include "ilo_state_compute.h"
 #include "ilo_builder.h"
 
-struct gen6_idrt_data {
-   const struct ilo_shader_state *cs;
-
-   uint32_t sampler_offset;
-   uint32_t binding_table_offset;
-
-   unsigned curbe_size;
-   unsigned thread_group_size;
-};
-
 static inline void
 gen6_MEDIA_VFE_STATE(struct ilo_builder *builder,
-                     unsigned curbe_alloc, bool use_slm)
+                     const struct ilo_state_compute *compute)
 {
    const uint8_t cmd_len = 8;
-   const unsigned idrt_alloc =
-      ((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32;
-   int max_threads;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
-   max_threads = builder->dev->thread_count;
-
-   curbe_alloc = align(curbe_alloc, 32);
-   assert(idrt_alloc + curbe_alloc <= builder->dev->urb_size / (use_slm + 1));
+   ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) | (cmd_len - 2);
-   dw[1] = 0; /* scratch */
-
-   dw[2] = (max_threads - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
-           0 << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
-           GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
-           GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL;
-   if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
-      dw[2] |= GEN7_VFE_DW2_GPGPU_MODE;
-
+   /* see compute_set_gen6_MEDIA_VFE_STATE() */
+   dw[1] = compute->vfe[0];
+   dw[2] = compute->vfe[1];
    dw[3] = 0;
-
-   dw[4] = 0 << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT |
-           (curbe_alloc / 32);
-
+   dw[4] = compute->vfe[2];
    dw[5] = 0;
    dw[6] = 0;
    dw[7] = 0;
@@ -194,8 +167,10 @@ gen7_GPGPU_WALKER(struct ilo_builder *builder,
 
 static inline uint32_t
 gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder,
-                               const struct gen6_idrt_data *data,
-                               int idrt_count)
+                               const struct ilo_state_compute *compute,
+                               const uint32_t *kernel_offsets,
+                               const uint32_t *sampler_offsets,
+                               const uint32_t *binding_table_offsets)
 {
    /*
     * From the Sandy Bridge PRM, volume 2 part 2, page 34:
@@ -211,61 +186,26 @@ gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder,
     *      aligned address of the Interface Descriptor data."
     */
    const int state_align = 32;
-   const int state_len = (32 / 4) * idrt_count;
+   const int state_len = (32 / 4) * compute->idrt_count;
    uint32_t state_offset, *dw;
    int i;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+   ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
    state_offset = ilo_builder_dynamic_pointer(builder,
          ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR, state_align, state_len, &dw);
 
-   for (i = 0; i < idrt_count; i++) {
-      const struct gen6_idrt_data *idrt = &data[i];
-      const struct ilo_shader_state *cs = idrt->cs;
-      unsigned sampler_count, bt_size, slm_size;
-
-      sampler_count =
-         ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT);
-      assert(sampler_count <= 16);
-      sampler_count = (sampler_count + 3) / 4;
-
-      bt_size =
-         ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT);
-      if (bt_size > 31)
-         bt_size = 31;
-
-      slm_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE);
-
-      assert(idrt->curbe_size / 32 <= 63);
-
-      dw[0] = ilo_shader_get_kernel_offset(idrt->cs);
+   for (i = 0; i < compute->idrt_count; i++) {
+      /* see compute_set_gen6_INTERFACE_DESCRIPTOR_DATA() */
+      dw[0] = compute->idrt[i][0] + kernel_offsets[i];
       dw[1] = 0;
-      dw[2] = idrt->sampler_offset |
-              sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT;
-      dw[3] = idrt->binding_table_offset |
-              bt_size << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT;
-
-      dw[4] = (idrt->curbe_size / 32) << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT |
-              0 << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT;
-
-      if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
-         dw[5] = GEN7_IDRT_DW5_ROUNDING_MODE_RTNE;
-
-         if (slm_size) {
-            assert(slm_size <= 64 * 1024);
-            slm_size = util_next_power_of_two((slm_size + 4095) / 4096);
-
-            dw[5] |= GEN7_IDRT_DW5_BARRIER_ENABLE |
-                     slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT |
-                     idrt->thread_group_size <<
-                        GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT;
-         }
-      } else {
-         dw[5] = 0;
-      }
-
-      dw[6] = 0;
+      dw[2] = compute->idrt[i][1] |
+              sampler_offsets[i];
+      dw[3] = compute->idrt[i][2] |
+              binding_table_offsets[i];
+      dw[4] = compute->idrt[i][3];
+      dw[5] = compute->idrt[i][4];
+      dw[6] = compute->idrt[i][5];
       dw[7] = 0;
 
       dw += 8;
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c
new file mode 100644
index 0000000..a5fe5e1
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c
@@ -0,0 +1,435 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv at lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_compute.h"
+
+struct compute_urb_configuration {
+   int idrt_entry_count;
+   int curbe_entry_count;
+
+   int urb_entry_count;
+   /* in 256-bit register increments */
+   int urb_entry_size;
+};
+
+static int
+get_gen6_rob_entry_count(const struct ilo_dev *dev)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 60:
+    *
+    *     "ROB has 64KB of storage; 2048 entries."
+    *
+    * From the valid ranges of "CURBE Allocation Size", we can also conclude
+    * that interface entries and CURBE data must be in ROB.  And that ROB
+    * should be 16KB, or 512 entries, on Gen7 GT1.
+    */
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      return 2048;
+   else if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      return (dev->gt == 2) ? 2048 : 512;
+   else
+      return (dev->gt == 2) ? 2048 : 1024;
+}
+
+static int
+get_gen6_idrt_entry_count(const struct ilo_dev *dev)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 21:
+    *
+    *     "The first 32 URB entries are reserved for the interface
+    *      descriptor..."
+    *
+    * From the Haswell PRM, volume 7, page 836:
+    *
+    *     "The first 64 URB entries are reserved for the interface
+    *      description..."
+    */
+   return (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 64 : 32;
+}
+
+static int
+get_gen6_curbe_entry_count(const struct ilo_dev *dev, uint32_t curbe_size)
+{
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 21:
+    *
+    *     "(CURBE Allocation Size) Specifies the total length allocated for
+    *      CURBE, in 256-bit register increments.
+    */
+   const int entry_count = (curbe_size + 31) / 32;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(get_gen6_idrt_entry_count(dev) + entry_count <=
+         get_gen6_rob_entry_count(dev));
+
+   return entry_count;
+}
+
+static bool
+compute_get_gen6_urb_configuration(const struct ilo_dev *dev,
+                                   const struct ilo_state_compute_info *info,
+                                   struct compute_urb_configuration *urb)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   urb->idrt_entry_count = get_gen6_idrt_entry_count(dev);
+   urb->curbe_entry_count =
+      get_gen6_curbe_entry_count(dev, info->curbe_alloc_size);
+
+   /*
+    * From the Broadwell PRM, volume 2b, page 451:
+    *
+    *     "Please note that 0 is not allowed for this field (Number of URB
+    *      Entries)."
+    */
+   urb->urb_entry_count = (ilo_dev_gen(dev) >= ILO_GEN(8)) ? 1 : 0;
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 52:
+    *
+    *     "(URB Entry Allocation Size) Specifies the length of each URB entry
+    *      used by the unit, in 256-bit register increments - 1."
+    */
+   urb->urb_entry_size = 1;
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 22:
+    *
+    *      MEDIA_VFE_STATE specifies the amount of CURBE space, the URB handle
+    *      size and the number of URB handles. The driver must ensure that
+    *      ((URB_handle_size * URB_num_handle) - CURBE - 32) <=
+    *      URB_allocation_in_L3."
+    */
+   assert(urb->idrt_entry_count + urb->curbe_entry_count +
+         urb->urb_entry_count * urb->urb_entry_size <=
+         info->cv_urb_alloc_size / 32);
+
+   return true;
+}
+
+static int
+compute_interface_get_gen6_read_end(const struct ilo_dev *dev,
+                                    const struct ilo_state_compute_interface_info *interface)
+{
+   const int per_thread_read = (interface->curbe_read_length + 31) / 32;
+   const int cross_thread_read =
+      (interface->cross_thread_curbe_read_length + 31) / 32;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(interface->curbe_read_offset % 32 == 0);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 60:
+    *
+    *     "(Constant URB Entry Read Length) [0,63]"
+    */
+   assert(per_thread_read <= 63);
+
+   /* From the Haswell PRM, volume 2d, page 199:
+    *
+    *     "(Cross-Thread Constant Data Read Length) [0,127]"
+    */
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      assert(cross_thread_read <= 127);
+   else
+      assert(!cross_thread_read);
+
+   if (per_thread_read || cross_thread_read) {
+      return interface->curbe_read_offset / 32 + cross_thread_read +
+         per_thread_read * interface->thread_group_size;
+   } else {
+      return 0;
+   }
+}
+
+static bool
+compute_validate_gen6(const struct ilo_dev *dev,
+                      const struct ilo_state_compute_info *info,
+                      const struct compute_urb_configuration *urb)
+{
+   int min_curbe_entry_count;
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(info->interface_count <= urb->idrt_entry_count);
+
+   min_curbe_entry_count = 0;
+   for (i = 0; i < info->interface_count; i++) {
+      const int read_end =
+         compute_interface_get_gen6_read_end(dev, &info->interfaces[i]);
+
+      if (min_curbe_entry_count < read_end)
+         min_curbe_entry_count = read_end;
+   }
+
+   assert(min_curbe_entry_count <= urb->curbe_entry_count);
+
+   /*
+    * From the Broadwell PRM, volume 2b, page 452:
+    *
+    *     "CURBE Allocation Size should be 0 for GPGPU workloads that uses
+    *      indirect instead of CURBE."
+    */
+   if (!min_curbe_entry_count)
+      assert(!urb->curbe_entry_count);
+
+   return true;
+}
+
+static uint8_t
+compute_get_gen6_scratch_space(const struct ilo_dev *dev,
+                               const struct ilo_state_compute_info *info)
+{
+   uint32_t scratch_size = 0;
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   for (i = 0; i < info->interface_count; i++) {
+      if (scratch_size < info->interfaces[i].scratch_size)
+         scratch_size = info->interfaces[i].scratch_size;
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      assert(scratch_size <= 2 * 1024 * 1024);
+
+      /* next power of two, starting from 1KB */
+      return (scratch_size > 1024) ?
+         (util_last_bit(scratch_size - 1) - 10): 0;
+   } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+      assert(scratch_size <= 2 * 1024 * 1024);
+
+      /* next power of two, starting from 2KB */
+      return (scratch_size > 2048) ?
+         (util_last_bit(scratch_size - 1) - 11): 0;
+   } else {
+      assert(scratch_size <= 12 * 1024);
+
+      return (scratch_size > 1024) ?
+         (scratch_size - 1) / 1024 : 0;
+   }
+}
+
+static bool
+compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
+                                 const struct ilo_dev *dev,
+                                 const struct ilo_state_compute_info *info)
+{
+   struct compute_urb_configuration urb;
+   uint8_t scratch_space;
+
+   uint32_t dw1, dw2, dw4;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!compute_get_gen6_urb_configuration(dev, info, &urb) ||
+       !compute_validate_gen6(dev, info, &urb))
+      return false;
+
+   scratch_space = compute_get_gen6_scratch_space(dev, info);
+
+   dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
+   dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
+         urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
+         GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
+         GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5))
+      dw2 |= GEN7_VFE_DW2_GPGPU_MODE;
+
+   assert(urb.urb_entry_size);
+
+   dw4 = (urb.urb_entry_size - 1) << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT |
+         urb.curbe_entry_count << GEN6_VFE_DW4_CURBE_SIZE__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(compute->vfe) >= 3);
+   compute->vfe[0] = dw1;
+   compute->vfe[1] = dw2;
+   compute->vfe[2] = dw4;
+
+   return true;
+}
+
+static uint8_t
+compute_interface_get_gen6_sampler_count(const struct ilo_dev *dev,
+                                         const struct ilo_state_compute_interface_info *interface)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+   return (interface->sampler_count <= 12) ?
+      (interface->sampler_count + 3) / 4 : 4;
+}
+
+static uint8_t
+compute_interface_get_gen6_surface_count(const struct ilo_dev *dev,
+                                         const struct ilo_state_compute_interface_info *interface)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+   return (interface->surface_count <= 31) ? interface->surface_count : 31;
+}
+
+static uint8_t
+compute_interface_get_gen7_slm_size(const struct ilo_dev *dev,
+                                    const struct ilo_state_compute_interface_info *interface)
+{
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 61:
+    *
+    *     "The amount is specified in 4k blocks, but only powers of 2 are
+    *      allowed: 0, 4k, 8k, 16k, 32k and 64k per half-slice."
+    */
+   assert(interface->slm_size <= 64 * 1024);
+
+   return util_next_power_of_two((interface->slm_size + 4095) / 4096);
+}
+
+static bool
+compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_state_compute *compute,
+                                           const struct ilo_dev *dev,
+                                           const struct ilo_state_compute_info *info)
+{
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   for (i = 0; i < info->interface_count; i++) {
+      const struct ilo_state_compute_interface_info *interface =
+         &info->interfaces[i];
+      uint16_t read_offset, per_thread_read_len, cross_thread_read_len;
+      uint8_t sampler_count, surface_count;
+      uint32_t dw0, dw2, dw3, dw4, dw5, dw6;
+
+      assert(interface->kernel_offset % 64 == 0);
+      assert(interface->thread_group_size);
+
+      read_offset = interface->curbe_read_offset / 32;
+      per_thread_read_len = (interface->curbe_read_length + 31) / 32;
+      cross_thread_read_len =
+         (interface->cross_thread_curbe_read_length + 31) / 32;
+
+      sampler_count =
+         compute_interface_get_gen6_sampler_count(dev, interface);
+      surface_count =
+         compute_interface_get_gen6_surface_count(dev, interface);
+
+      dw0 = interface->kernel_offset;
+      dw2 = sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT;
+      dw3 = surface_count << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT;
+      dw4 = per_thread_read_len << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT |
+            read_offset << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT;
+
+      dw5 = 0;
+      dw6 = 0;
+      if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+         const uint8_t slm_size =
+            compute_interface_get_gen7_slm_size(dev, interface);
+
+         dw5 |= GEN7_IDRT_DW5_ROUNDING_MODE_RTNE;
+
+         if (slm_size) {
+            dw5 |= GEN7_IDRT_DW5_BARRIER_ENABLE |
+                   slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT;
+         }
+
+         /*
+          * From the Haswell PRM, volume 2d, page 199:
+          *
+          *     "(Number of Threads in GPGPU Thread Group) Specifies the
+          *      number of threads that are in this thread group.  Used to
+          *      program the barrier for the number of messages to expect. The
+          *      minimum value is 0 (which will disable the barrier), while
+          *      the maximum value is the number of threads in a subslice for
+          *      local barriers."
+          *
+          * From the Broadwell PRM, volume 2d, page 183:
+          *
+          *     "(Number of Threads in GPGPU Thread Group) Specifies the
+          *      number of threads that are in this thread group.  The minimum
+          *      value is 1, while the maximum value is the number of threads
+          *      in a subslice for local barriers. See vol1b Configurations
+          *      for the number of threads per subslice for different
+          *      products.  The maximum value for global barriers is limited
+          *      by the number of threads in the system, or by 511, whichever
+          *      is lower. This field should not be set to 0 even if the
+          *      barrier is disabled, since an accurate value is needed for
+          *      proper pre-emption."
+          */
+         if (slm_size || ilo_dev_gen(dev) >= ILO_GEN(8)) {
+            dw5 |= interface->thread_group_size <<
+               GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT;
+         }
+
+         if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+            dw6 |= cross_thread_read_len <<
+               GEN75_IDRT_DW6_CROSS_THREAD_CURBE_READ_LEN__SHIFT;
+         }
+      }
+
+      STATIC_ASSERT(ARRAY_SIZE(compute->idrt[i]) >= 6);
+      compute->idrt[i][0] = dw0;
+      compute->idrt[i][1] = dw2;
+      compute->idrt[i][2] = dw3;
+      compute->idrt[i][3] = dw4;
+      compute->idrt[i][4] = dw5;
+      compute->idrt[i][5] = dw6;
+   }
+
+   return true;
+}
+
+bool
+ilo_state_compute_init(struct ilo_state_compute *compute,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_compute_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(compute, sizeof(*compute)));
+   assert(ilo_is_zeroed(info->data, info->data_size));
+
+   assert(ilo_state_compute_data_size(dev, info->interface_count) <=
+         info->data_size);
+   compute->idrt = (uint32_t (*)[6]) info->data;
+
+   ret &= compute_set_gen6_MEDIA_VFE_STATE(compute, dev, info);
+   ret &= compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(compute, dev, info);
+
+   assert(ret);
+
+   return ret;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h
new file mode 100644
index 0000000..346f7b6
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h
@@ -0,0 +1,92 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv at lunarg.com>
+ */
+
+#ifndef ILO_STATE_COMPUTE_H
+#define ILO_STATE_COMPUTE_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Haswell PRM, volume 7, page 836:
+ *
+ *     "The first 64 URB entries are reserved for the interface
+ *      description..."
+ */
+#define ILO_STATE_COMPUTE_MAX_INTERFACE_COUNT 64
+
+struct ilo_state_compute_interface_info {
+   /* usually 0 unless there are multiple interfaces */
+   uint32_t kernel_offset;
+
+   uint32_t scratch_size;
+
+   uint8_t sampler_count;
+   uint8_t surface_count;
+
+   uint16_t thread_group_size;
+   uint32_t slm_size;
+
+   uint16_t curbe_read_offset;
+   uint16_t curbe_read_length;
+   uint16_t cross_thread_curbe_read_length;
+};
+
+struct ilo_state_compute_info {
+   void *data;
+   size_t data_size;
+
+   const struct ilo_state_compute_interface_info *interfaces;
+   uint8_t interface_count;
+
+   uint32_t cv_urb_alloc_size;
+   uint32_t curbe_alloc_size;
+};
+
+struct ilo_state_compute {
+   uint32_t vfe[3];
+
+   uint32_t (*idrt)[6];
+   uint8_t idrt_count;
+};
+
+static inline size_t
+ilo_state_compute_data_size(const struct ilo_dev *dev,
+                            uint8_t interface_count)
+{
+   const struct ilo_state_compute *compute = NULL;
+   return sizeof(compute->idrt[0]) * interface_count;
+}
+
+bool
+ilo_state_compute_init(struct ilo_state_compute *compute,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_compute_info *info);
+
+#endif /* ILO_STATE_COMPUTE_H */
diff --git a/src/gallium/drivers/ilo/ilo_render_dynamic.c b/src/gallium/drivers/ilo/ilo_render_dynamic.c
index 5618920..3b4c802 100644
--- a/src/gallium/drivers/ilo/ilo_render_dynamic.c
+++ b/src/gallium/drivers/ilo/ilo_render_dynamic.c
@@ -30,6 +30,7 @@
 
 #include "ilo_common.h"
 #include "ilo_blitter.h"
+#include "ilo_shader.h"
 #include "ilo_state.h"
 #include "ilo_render_gen.h"
 
@@ -522,20 +523,39 @@ gen6_emit_launch_grid_dynamic_idrt(struct ilo_render *r,
                                    struct ilo_render_launch_grid_session *session)
 {
    const struct ilo_shader_state *cs = vec->cs;
-   struct gen6_idrt_data data;
+   struct ilo_state_compute_interface_info interface;
+   struct ilo_state_compute_info info;
+   uint32_t kernel_offset;
 
    ILO_DEV_ASSERT(r->dev, 7, 7.5);
 
-   memset(&data, 0, sizeof(data));
+   memset(&interface, 0, sizeof(interface));
 
-   data.cs = cs;
-   data.sampler_offset = r->state.cs.SAMPLER_STATE;
-   data.binding_table_offset = r->state.cs.BINDING_TABLE_STATE;
+   interface.sampler_count =
+      ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT);
+   interface.surface_count =
+      ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT);
+   interface.thread_group_size = session->thread_group_size;
+   interface.slm_size =
+      ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE);
+   interface.curbe_read_length = r->state.cs.PUSH_CONSTANT_BUFFER_size;
 
-   data.curbe_size = r->state.cs.PUSH_CONSTANT_BUFFER_size;
-   data.thread_group_size = session->thread_group_size;
+   memset(&info, 0, sizeof(info));
+   info.data = session->compute_data;
+   info.data_size = sizeof(session->compute_data);
+   info.interfaces = &interface;
+   info.interface_count = 1;
+   info.cv_urb_alloc_size = r->dev->urb_size;
+   info.curbe_alloc_size = r->state.cs.PUSH_CONSTANT_BUFFER_size;
+
+   ilo_state_compute_init(&session->compute, r->dev, &info);
+
+   kernel_offset = ilo_shader_get_kernel_offset(cs);
+
+   session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder,
+         &session->compute, &kernel_offset,
+         &r->state.cs.SAMPLER_STATE, &r->state.cs.BINDING_TABLE_STATE);
 
-   session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder, &data, 1);
    session->idrt_size = 32;
 }
 
diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h
index 00c8113..aae4ef2 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen.h
+++ b/src/gallium/drivers/ilo/ilo_render_gen.h
@@ -185,6 +185,9 @@ struct ilo_render_launch_grid_session {
 
    uint32_t idrt;
    int idrt_size;
+
+   uint32_t compute_data[6];
+   struct ilo_state_compute compute;
 };
 
 int
diff --git a/src/gallium/drivers/ilo/ilo_render_media.c b/src/gallium/drivers/ilo/ilo_render_media.c
index 387920a..a0de002 100644
--- a/src/gallium/drivers/ilo/ilo_render_media.c
+++ b/src/gallium/drivers/ilo/ilo_render_media.c
@@ -30,6 +30,7 @@
 #include "core/ilo_builder_mi.h"
 #include "core/ilo_builder_render.h"
 
+#include "ilo_shader.h"
 #include "ilo_state.h"
 #include "ilo_render_gen.h"
 
@@ -206,7 +207,7 @@ ilo_render_emit_launch_grid_commands(struct ilo_render *render,
 
    gen6_state_base_address(render->builder, true);
 
-   gen6_MEDIA_VFE_STATE(render->builder, pcb_size, use_slm);
+   gen6_MEDIA_VFE_STATE(render->builder, &session->compute);
 
    if (pcb_size)
       gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size);
diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h
index e4746d0..537e5db 100644
--- a/src/gallium/drivers/ilo/ilo_state.h
+++ b/src/gallium/drivers/ilo/ilo_state.h
@@ -29,6 +29,7 @@
 #define ILO_STATE_H
 
 #include "core/ilo_state_cc.h"
+#include "core/ilo_state_compute.h"
 #include "core/ilo_state_raster.h"
 #include "core/ilo_state_sampler.h"
 #include "core/ilo_state_sbe.h"