[Mesa-dev] [PATCH v2 3/3] i965: enable INTEL_blackhole_render

Lionel Landwerlin lionel.g.landwerlin at intel.com
Fri Apr 6 14:31:41 UTC 2018


v2: condition the extension on context isolation support from the
    kernel (Chris)

v3: (Lionel)

    The initial version of this change used a feature of the Gen7+
    command parser to turn the primitive instructions into no-ops.
    Unfortunately this doesn't play well with how we're using the
    hardware outside of the user submitted commands. For example
    resolves are implicit operations which should not be turned into
    no-ops as part of the previously submitted commands (before
    blackhole_render is enabled) might not be disabled. For example
    this sequence :

       glClear();
       glEnable(GL_BLACKHOLE_RENDER_INTEL);
       glDrawArrays(...);
       glReadPixels(...);
       glDisable(GL_BLACKHOLE_RENDER_INTEL);

    While clear has been emitted outside the blackhole render, it
    should still be resolved properly in the read pixels. Hence we
    need to be more selective and only disable user submitted
    commands.

    This v3 manually turns primitives into MI_NOOP if blackhole render
    is enabled. This lets us enable this feature on any platform.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
 src/mesa/drivers/dri/i965/brw_compute.c      | 46 +++++++++++---------
 src/mesa/drivers/dri/i965/brw_defines.h      |  8 +++-
 src/mesa/drivers/dri/i965/brw_draw.c         | 20 ++++++---
 src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
 4 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index 5ce899bcbcc..a368e5fb2c6 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -131,29 +131,35 @@ brw_emit_gpgpu_walker(struct brw_context *brw)
    if (right_non_aligned != 0)
       right_mask >>= (simd_size - right_non_aligned);
 
+   struct gl_context *ctx = &brw->ctx;
    uint32_t dwords = devinfo->gen < 8 ? 11 : 15;
    BEGIN_BATCH(dwords);
-   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
-   OUT_BATCH(0);
-   if (devinfo->gen >= 8) {
-      OUT_BATCH(0);                     /* Indirect Data Length */
-      OUT_BATCH(0);                     /* Indirect Data Start Address */
+   if (ctx->IntelBlackholeRender) {
+      for (uint32_t d = 0; d < dwords; d++)
+         OUT_BATCH(MI_NOOP);
+   } else {
+      OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
+      OUT_BATCH(0);
+      if (devinfo->gen >= 8) {
+         OUT_BATCH(0);                     /* Indirect Data Length */
+         OUT_BATCH(0);                     /* Indirect Data Start Address */
+      }
+      assert(thread_width_max <= brw->screen->devinfo.max_cs_threads);
+      OUT_BATCH(SET_FIELD(simd_size / 16, GPGPU_WALKER_SIMD_SIZE) |
+                SET_FIELD(thread_width_max - 1, GPGPU_WALKER_THREAD_WIDTH_MAX));
+      OUT_BATCH(0);                        /* Thread Group ID Starting X */
+      if (devinfo->gen >= 8)
+         OUT_BATCH(0);                     /* MBZ */
+      OUT_BATCH(num_groups[0]);            /* Thread Group ID X Dimension */
+      OUT_BATCH(0);                        /* Thread Group ID Starting Y */
+      if (devinfo->gen >= 8)
+         OUT_BATCH(0);                     /* MBZ */
+      OUT_BATCH(num_groups[1]);            /* Thread Group ID Y Dimension */
+      OUT_BATCH(0);                        /* Thread Group ID Starting/Resume Z */
+      OUT_BATCH(num_groups[2]);            /* Thread Group ID Z Dimension */
+      OUT_BATCH(right_mask);               /* Right Execution Mask */
+      OUT_BATCH(0xffffffff);               /* Bottom Execution Mask */
    }
-   assert(thread_width_max <= brw->screen->devinfo.max_cs_threads);
-   OUT_BATCH(SET_FIELD(simd_size / 16, GPGPU_WALKER_SIMD_SIZE) |
-             SET_FIELD(thread_width_max - 1, GPGPU_WALKER_THREAD_WIDTH_MAX));
-   OUT_BATCH(0);                        /* Thread Group ID Starting X */
-   if (devinfo->gen >= 8)
-      OUT_BATCH(0);                     /* MBZ */
-   OUT_BATCH(num_groups[0]);            /* Thread Group ID X Dimension */
-   OUT_BATCH(0);                        /* Thread Group ID Starting Y */
-   if (devinfo->gen >= 8)
-      OUT_BATCH(0);                     /* MBZ */
-   OUT_BATCH(num_groups[1]);            /* Thread Group ID Y Dimension */
-   OUT_BATCH(0);                        /* Thread Group ID Starting/Resume Z */
-   OUT_BATCH(num_groups[2]);            /* Thread Group ID Z Dimension */
-   OUT_BATCH(right_mask);               /* Right Execution Mask */
-   OUT_BATCH(0xffffffff);               /* Bottom Execution Mask */
    ADVANCE_BATCH();
 
    BEGIN_BATCH(2);
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 8bf6f68b67c..c8a597c8ad0 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1650,11 +1650,17 @@ enum brw_pixel_shader_coverage_mask_mode {
 #define GEN10_CACHE_MODE_SS            0x0e420
 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
 
-#define INSTPM                             0x20c0
+#define INSTPM                             0x20c0 /* Gen6-8 */
 # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
+# define INSTPM_GLOBAL_DEBUG_ENABLE                    (1 << 4)
+# define INSTPM_MEDIA_INSTRUCTION_DISABLE              (1 << 3)
+# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 2)
+# define INSTPM_3D_STATE_INSTRUCTION_DISABLE           (1 << 1)
 
 #define CS_DEBUG_MODE2                     0x20d8 /* Gen9+ */
 # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
+# define CSDBG2_MEDIA_INSTRUCTION_DISABLE              (1 << 1)
+# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 0)
 
 #define SLICE_COMMON_ECO_CHICKEN1          0x731c /* Gen9+ */
 # define GLK_SCEC_BARRIER_MODE_GPGPU       (0 << 7)
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 4caaadd560d..9d44f2b6026 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -250,6 +250,7 @@ brw_emit_prim(struct brw_context *brw,
       indirect_flag = 0;
    }
 
+   struct gl_context *ctx = &brw->ctx;
    BEGIN_BATCH(devinfo->gen >= 7 ? 7 : 6);
 
    if (devinfo->gen >= 7) {
@@ -257,12 +258,21 @@ brw_emit_prim(struct brw_context *brw,
          (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
          ? GEN7_3DPRIM_PREDICATE_ENABLE : 0;
 
-      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
-      OUT_BATCH(hw_prim | vertex_access_type);
+      if (ctx->IntelBlackholeRender) {
+         OUT_BATCH(MI_NOOP);
+         OUT_BATCH(MI_NOOP);
+      } else {
+         OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
+         OUT_BATCH(hw_prim | vertex_access_type);
+      }
    } else {
-      OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
-                hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
-                vertex_access_type);
+      if (ctx->IntelBlackholeRender) {
+         OUT_BATCH(MI_NOOP);
+      } else {
+         OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
+                   hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
+                   vertex_access_type);
+      }
    }
    OUT_BATCH(verts_per_instance);
    OUT_BATCH(start_vertex_location);
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 73a6c73f537..171dc05fe24 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -120,6 +120,7 @@ intelInitExtensions(struct gl_context *ctx)
    ctx->Extensions.APPLE_object_purgeable = true;
    ctx->Extensions.ATI_separate_stencil = true;
    ctx->Extensions.ATI_texture_env_combine3 = true;
+   ctx->Extensions.INTEL_blackhole_render = true;
    ctx->Extensions.MESA_pack_invert = true;
    ctx->Extensions.NV_conditional_render = true;
    ctx->Extensions.NV_primitive_restart = true;
-- 
2.17.0



More information about the mesa-dev mailing list