[Mesa-dev] [PATCH v4 7/7] i965: enable INTEL_blackhole_render

Lionel Landwerlin lionel.g.landwerlin at intel.com
Wed Jun 20 17:25:34 UTC 2018


v2: condition the extension on context isolation support from the
    kernel (Chris)

v3: (Lionel)

    The initial version of this change used a feature of the Gen7+
    command parser to turn the primitive instructions into no-ops.
    Unfortunately this doesn't play well with how we're using the
    hardware outside of the user submitted commands. For example
    resolves are implicit operations which should not be turned into
    no-ops as part of the previously submitted commands (before
    blackhole_render is enabled) might not be disabled. For example
    this sequence :

       glClear();
       glEnable(GL_BLACKHOLE_RENDER_INTEL);
       glDrawArrays(...);
       glReadPixels(...);
       glDisable(GL_BLACKHOLE_RENDER_INTEL);

    While clear has been emitted outside the blackhole render, it
    should still be resolved properly in the read pixels. Hence we
    need to be more selective and only disable user submitted
    commands.

    This v3 manually turns primitives into MI_NOOP if blackhole render
    is enabled. This lets us enable this feature on any platform.

v4: Limit support to gen7.5+ (Lionel)

v5: Enable Gen7.5 support again, requires a kernel update of the
    command parser (Lionel)

v6: Disable Gen7.5 again... Kernel devs want these patches landed
    before they accept the kernel patches to whitelist INSTPM (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
 src/mesa/drivers/dri/i965/brw_clear.c         |  3 +
 src/mesa/drivers/dri/i965/brw_context.h       |  2 +
 src/mesa/drivers/dri/i965/brw_defines.h       |  8 ++-
 src/mesa/drivers/dri/i965/brw_misc_state.c    | 56 +++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_state.h         |  4 ++
 src/mesa/drivers/dri/i965/brw_state_upload.c  |  2 +
 src/mesa/drivers/dri/i965/genX_state_upload.c |  4 ++
 src/mesa/drivers/dri/i965/intel_extensions.c  |  8 +++
 src/mesa/drivers/dri/i965/intel_fbo.c         |  6 ++
 src/mesa/drivers/dri/i965/intel_pixel_read.c  |  3 +
 src/mesa/drivers/dri/i965/intel_tex_copy.c    |  3 +
 src/mesa/drivers/dri/i965/intel_tex_image.c   |  5 ++
 12 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index b097dfe346c..d3e360b3e23 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -247,6 +247,9 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
    if (!_mesa_check_conditional_render(ctx))
       return;
 
+   if (ctx->IntelBlackholeRender)
+      return;
+
    if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
       brw->front_buffer_dirty = true;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0880d18b6f0..23602df2138 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -218,6 +218,7 @@ enum brw_state_id {
    BRW_STATE_CONSERVATIVE_RASTERIZATION,
    BRW_STATE_DRAW_CALL,
    BRW_STATE_AUX,
+   BRW_STATE_CS_NOOP,
    BRW_NUM_STATE_BITS
 };
 
@@ -309,6 +310,7 @@ enum brw_state_id {
 #define BRW_NEW_CONSERVATIVE_RASTERIZATION (1ull << BRW_STATE_CONSERVATIVE_RASTERIZATION)
 #define BRW_NEW_DRAW_CALL               (1ull << BRW_STATE_DRAW_CALL)
 #define BRW_NEW_AUX_STATE               (1ull << BRW_STATE_AUX)
+#define BRW_NEW_CS_NOOP                 (1ull << BRW_STATE_CS_NOOP)
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 320426d6944..4e2d6acc706 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1651,11 +1651,17 @@ enum brw_pixel_shader_coverage_mask_mode {
 #define GEN10_CACHE_MODE_SS            0x0e420
 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
 
-#define INSTPM                             0x20c0
+#define INSTPM                             0x20c0 /* Gen6-8 */
 # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
+# define INSTPM_GLOBAL_DEBUG_ENABLE                    (1 << 4)
+# define INSTPM_MEDIA_INSTRUCTION_DISABLE              (1 << 3)
+# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 2)
+# define INSTPM_3D_STATE_INSTRUCTION_DISABLE           (1 << 1)
 
 #define CS_DEBUG_MODE2                     0x20d8 /* Gen9+ */
 # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
+# define CSDBG2_MEDIA_INSTRUCTION_DISABLE              (1 << 1)
+# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 0)
 
 #define GEN7_RPSTAT1                       0xA01C
 #define  GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT   7
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 9a663b1d61c..baf64757b93 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -811,3 +811,59 @@ brw_upload_state_base_address(struct brw_context *brw)
    brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
    brw->batch.state_base_address_emitted = true;
 }
+
+void
+brw_set_cs_noop(struct brw_context *brw, bool enable_noop)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   uint32_t reg_offset = devinfo->gen >= 9 ? CS_DEBUG_MODE2 : INSTPM;
+   uint32_t reg_mask = devinfo->gen >= 9 ?
+      REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE |
+               CSDBG2_MEDIA_INSTRUCTION_DISABLE) :
+      REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+               INSTPM_MEDIA_INSTRUCTION_DISABLE);
+   uint32_t reg_value = 0;
+
+   if (enable_noop) {
+      reg_value = devinfo->gen >= 9 ?
+         (CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE |
+          CSDBG2_MEDIA_INSTRUCTION_DISABLE) :
+         (INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+          INSTPM_MEDIA_INSTRUCTION_DISABLE);
+   }
+
+   if (devinfo->gen >= 8) {
+      brw_emit_pipe_control_write(brw,
+                                  PIPE_CONTROL_LRI_WRITE_IMMEDIATE |
+                                  PIPE_CONTROL_CS_STALL,
+                                  NULL, reg_offset, reg_mask | reg_value);
+   } else {
+      brw_load_register_imm32_force_posted(brw,
+                                           reg_offset,
+                                           reg_mask | reg_value);
+   }
+}
+
+void
+brw_hold_cs_noop(struct brw_context *brw)
+{
+   if (!brw->ctx.IntelBlackholeRender)
+      return;
+
+   brw_set_cs_noop(brw, false);
+   brw->ctx.NewDriverState |= brw->ctx.DriverFlags.NewIntelBlackholeRender;
+}
+
+static void
+brw_emit_cs_noop(struct brw_context *brw)
+{
+   brw_set_cs_noop(brw, brw->ctx.IntelBlackholeRender);
+}
+
+const struct brw_tracked_state brw_cs_noop = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CS_NOOP,
+   },
+   .emit = brw_emit_cs_noop,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 1b4745ef753..4e10f35e157 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -94,6 +94,7 @@ extern const struct brw_tracked_state gen7_push_constant_space;
 extern const struct brw_tracked_state gen7_urb;
 extern const struct brw_tracked_state gen8_pma_fix;
 extern const struct brw_tracked_state brw_cs_work_groups_surface;
+extern const struct brw_tracked_state brw_cs_noop;
 
 static inline bool
 brw_state_dirty(const struct brw_context *brw,
@@ -116,6 +117,9 @@ brw_depthbuffer_format(struct brw_context *brw);
 
 void brw_upload_state_base_address(struct brw_context *brw);
 
+void brw_set_cs_noop(struct brw_context *brw, bool enable_noop);
+void brw_hold_cs_noop(struct brw_context *brw);
+
 /* gen8_depth_state.c */
 void gen8_write_pma_stall_bits(struct brw_context *brw,
                                uint32_t pma_stall_bits);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index d8273aa5734..7c90249c6c4 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -234,6 +234,7 @@ void brw_init_state( struct brw_context *brw )
    ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
    ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
    ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
+   ctx->DriverFlags.NewIntelBlackholeRender = BRW_NEW_CS_NOOP;
 }
 
 
@@ -369,6 +370,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
    DEFINE_BIT(BRW_NEW_DRAW_CALL),
    DEFINE_BIT(BRW_NEW_AUX_STATE),
+   DEFINE_BIT(BRW_NEW_CS_NOOP),
    {0, 0, 0}
 };
 
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 88fde9d12fd..3f444967bfa 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -5770,6 +5770,7 @@ genX(init_atoms)(struct brw_context *brw)
 
       &genX(cut_index),
       &gen8_pma_fix,
+      &brw_cs_noop,
    };
 #endif
 
@@ -5789,6 +5790,9 @@ genX(init_atoms)(struct brw_context *brw)
       &brw_cs_work_groups_surface,
       &genX(cs_samplers),
       &genX(cs_state),
+#if GEN_GEN >= 8
+      &brw_cs_noop,
+#endif
    };
 
    STATIC_ASSERT(ARRAY_SIZE(compute_atoms) <= ARRAY_SIZE(brw->compute_atoms));
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 5a9369d7b43..88d24bc2538 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -292,6 +292,14 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.OES_texture_view = true;
    }
 
+   if (brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION &&
+       devinfo->gen >= 8) {
+      /* We'll be able to enable Haswell when the kernel command parser allows
+       * INSTPM.
+       */
+      ctx->Extensions.INTEL_blackhole_render = true;
+   }
+
    if (devinfo->gen >= 8) {
       ctx->Extensions.ARB_gpu_shader_int64 = devinfo->has_64bit_types;
       /* requires ARB_gpu_shader_int64 */
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index fb84b738c08..dfcb626c38e 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -50,6 +50,7 @@
 #include "intel_tex.h"
 #include "brw_context.h"
 #include "brw_defines.h"
+#include "brw_state.h"
 
 #define FILE_DEBUG_FLAG DEBUG_FBO
 
@@ -123,6 +124,8 @@ intel_map_renderbuffer(struct gl_context *ctx,
       return;
    }
 
+   brw_hold_cs_noop(brw);
+
    intel_prepare_render(brw);
 
    /* The MapRenderbuffer API should always return a single-sampled mapping.
@@ -881,6 +884,9 @@ intel_blit_framebuffer(struct gl_context *ctx,
    if (!_mesa_check_conditional_render(ctx))
       return;
 
+   if (ctx->IntelBlackholeRender)
+      return;
+
    if (devinfo->gen < 6) {
       /* On gen4-5, try BLT first.
        *
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 6ed7895bc76..865132ada8a 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -37,6 +37,7 @@
 
 #include "brw_context.h"
 #include "brw_blorp.h"
+#include "brw_state.h"
 #include "intel_screen.h"
 #include "intel_batchbuffer.h"
 #include "intel_buffers.h"
@@ -272,6 +273,8 @@ intelReadPixels(struct gl_context * ctx,
    intel_prepare_render(brw);
    brw->front_buffer_dirty = dirty;
 
+   brw_hold_cs_noop(brw);
+
    if (_mesa_is_bufferobj(pack->BufferObj)) {
       if (intel_readpixels_blorp(ctx, x, y, width, height,
                                  format, type, pixels, pack))
diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c
index bc209c605bd..1032cd659e4 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
@@ -38,6 +38,7 @@
 #include "intel_fbo.h"
 #include "intel_tex.h"
 #include "brw_context.h"
+#include "brw_state.h"
 
 #define FILE_DEBUG_FLAG DEBUG_TEXTURE
 
@@ -52,6 +53,8 @@ intelCopyTexSubImage(struct gl_context *ctx, GLuint dims,
 {
    struct brw_context *brw = brw_context(ctx);
 
+   brw_hold_cs_noop(brw);
+
    /* Try BLORP first.  It can handle almost everything. */
    if (brw_blorp_copytexsubimage(brw, rb, texImage, slice, x, y,
                                  xoffset, yoffset, width, height))
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index fae179214dd..027eca7e121 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -26,6 +26,7 @@
 #include "intel_tiled_memcpy.h"
 #include "brw_context.h"
 #include "brw_blorp.h"
+#include "brw_state.h"
 
 #define FILE_DEBUG_FLAG DEBUG_TEXTURE
 
@@ -328,6 +329,8 @@ intel_upload_tex(struct gl_context * ctx,
    if (mt && mt->format == MESA_FORMAT_S_UINT8)
       mt->r8stencil_needs_update = true;
 
+   brw_hold_cs_noop(brw);
+
    if (_mesa_is_bufferobj(packing->BufferObj) || tex_busy ||
        mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
       ok = intel_texsubimage_blorp(brw, dims, texImage,
@@ -822,6 +825,8 @@ intel_get_tex_sub_image(struct gl_context *ctx,
 
    DBG("%s\n", __func__);
 
+   brw_hold_cs_noop(brw);
+
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
       if (intel_gettexsubimage_blorp(brw, texImage,
                                      xoffset, yoffset, zoffset,
-- 
2.17.1



More information about the mesa-dev mailing list