[Mesa-dev] [PATCH v3 3/3] i965: enable INTEL_blackhole_render

Lionel Landwerlin lionel.g.landwerlin at intel.com
Fri Apr 27 16:07:13 UTC 2018


v2: condition the extension on context isolation support from the
    kernel (Chris)

v3: (Lionel)

    The initial version of this change used a feature of the Gen7+
    command parser to turn the primitive instructions into no-ops.
    Unfortunately this doesn't play well with how we're using the
    hardware outside of the user submitted commands. For example
    resolves are implicit operations which should not be turned into
    no-ops as part of the previously submitted commands (before
    blackhole_render is enabled) might not be disabled. For example
    this sequence :

       glClear();
       glEnable(GL_BLACKHOLE_RENDER_INTEL);
       glDrawArrays(...);
       glReadPixels(...);
       glDisable(GL_BLACKHOLE_RENDER_INTEL);

    While clear has been emitted outside the blackhole render, it
    should still be resolved properly in the read pixels. Hence we
    need to be more selective and only disable user submitted
    commands.

    This v3 manually turns primitives into MI_NOOP if blackhole render
    is enabled. This lets us enable this feature on any platform.

v4: Bring back CS noop flags from v1/2 (Lionel)
    Limit support to gen8+, see intel_extensions.c (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
 src/mesa/drivers/dri/i965/brw_clear.c         |  3 ++
 src/mesa/drivers/dri/i965/brw_context.h       |  2 +
 src/mesa/drivers/dri/i965/brw_defines.h       |  8 +++-
 src/mesa/drivers/dri/i965/brw_misc_state.c    | 48 +++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_state.h         |  4 ++
 src/mesa/drivers/dri/i965/brw_state_upload.c  |  2 +
 src/mesa/drivers/dri/i965/genX_state_upload.c |  4 ++
 src/mesa/drivers/dri/i965/intel_extensions.c  | 13 +++++
 src/mesa/drivers/dri/i965/intel_fbo.c         | 11 +++++
 src/mesa/drivers/dri/i965/intel_pixel_read.c  |  3 ++
 src/mesa/drivers/dri/i965/intel_tex_copy.c    |  3 ++
 src/mesa/drivers/dri/i965/intel_tex_image.c   |  5 ++
 12 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 3d540d6d905..f434b36d629 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -293,6 +293,9 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
    if (!_mesa_check_conditional_render(ctx))
       return;
 
+   if (ctx->IntelBlackholeRender)
+      return;
+
    if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
       brw->front_buffer_dirty = true;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 1e6a45eee1f..211a4ef7baa 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -218,6 +218,7 @@ enum brw_state_id {
    BRW_STATE_CONSERVATIVE_RASTERIZATION,
    BRW_STATE_DRAW_CALL,
    BRW_STATE_AUX,
+   BRW_STATE_CS_NOOP,
    BRW_NUM_STATE_BITS
 };
 
@@ -309,6 +310,7 @@ enum brw_state_id {
 #define BRW_NEW_CONSERVATIVE_RASTERIZATION (1ull << BRW_STATE_CONSERVATIVE_RASTERIZATION)
 #define BRW_NEW_DRAW_CALL               (1ull << BRW_STATE_DRAW_CALL)
 #define BRW_NEW_AUX_STATE               (1ull << BRW_STATE_AUX)
+#define BRW_NEW_CS_NOOP                 (1ull << BRW_STATE_CS_NOOP)
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 855f1c7d744..83a39406482 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1650,11 +1650,17 @@ enum brw_pixel_shader_coverage_mask_mode {
 #define GEN10_CACHE_MODE_SS            0x0e420
 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
 
-#define INSTPM                             0x20c0
+#define INSTPM                             0x20c0 /* Gen6-8 */
 # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
+# define INSTPM_GLOBAL_DEBUG_ENABLE                    (1 << 4)
+# define INSTPM_MEDIA_INSTRUCTION_DISABLE              (1 << 3)
+# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 2)
+# define INSTPM_3D_STATE_INSTRUCTION_DISABLE           (1 << 1)
 
 #define CS_DEBUG_MODE2                     0x20d8 /* Gen9+ */
 # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
+# define CSDBG2_MEDIA_INSTRUCTION_DISABLE              (1 << 1)
+# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 0)
 
 #define GEN7_RPSTAT1                       0xA01C
 #define  GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT   7
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 05517ebf587..6aac6ed70e1 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -764,3 +764,51 @@ brw_upload_state_base_address(struct brw_context *brw)
    brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
    brw->batch.state_base_address_emitted = true;
 }
+
+void
+brw_set_cs_noop(struct brw_context *brw, bool enable_noop)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   uint32_t reg_offset = devinfo->gen >= 9 ? CS_DEBUG_MODE2 : INSTPM;
+   uint32_t reg_mask = devinfo->gen >= 9 ?
+      REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE |
+               CSDBG2_MEDIA_INSTRUCTION_DISABLE) :
+      REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+               INSTPM_MEDIA_INSTRUCTION_DISABLE);
+   uint32_t reg_value = 0;
+
+   if (enable_noop) {
+      reg_value = devinfo->gen >= 9 ?
+         (CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE |
+          CSDBG2_MEDIA_INSTRUCTION_DISABLE) :
+         (INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+          INSTPM_MEDIA_INSTRUCTION_DISABLE);
+   }
+
+   brw_load_register_imm32(brw, reg_offset, reg_mask | reg_value);
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_LRI_WRITE_IMMEDIATE);
+}
+
+void
+brw_hold_cs_noop(struct brw_context *brw)
+{
+   if (!brw->ctx.IntelBlackholeRender)
+      return;
+
+   brw_set_cs_noop(brw, false);
+   brw->ctx.NewDriverState |= brw->ctx.DriverFlags.NewIntelBlackholeRender;
+}
+
+static void
+brw_emit_cs_noop(struct brw_context *brw)
+{
+   brw_set_cs_noop(brw, brw->ctx.IntelBlackholeRender);
+}
+
+const struct brw_tracked_state brw_cs_noop = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CS_NOOP,
+   },
+   .emit = brw_emit_cs_noop,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 9acb6257401..52745a6df8e 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -95,6 +95,7 @@ extern const struct brw_tracked_state gen7_push_constant_space;
 extern const struct brw_tracked_state gen7_urb;
 extern const struct brw_tracked_state gen8_pma_fix;
 extern const struct brw_tracked_state brw_cs_work_groups_surface;
+extern const struct brw_tracked_state brw_cs_noop;
 
 static inline bool
 brw_state_dirty(const struct brw_context *brw,
@@ -120,6 +121,9 @@ brw_convert_depth_value(mesa_format format, float value);
 
 void brw_upload_state_base_address(struct brw_context *brw);
 
+void brw_set_cs_noop(struct brw_context *brw, bool enable_noop);
+void brw_hold_cs_noop(struct brw_context *brw);
+
 /* gen8_depth_state.c */
 void gen8_write_pma_stall_bits(struct brw_context *brw,
                                uint32_t pma_stall_bits);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index d8273aa5734..7c90249c6c4 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -234,6 +234,7 @@ void brw_init_state( struct brw_context *brw )
    ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
    ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
    ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
+   ctx->DriverFlags.NewIntelBlackholeRender = BRW_NEW_CS_NOOP;
 }
 
 
@@ -369,6 +370,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
    DEFINE_BIT(BRW_NEW_DRAW_CALL),
    DEFINE_BIT(BRW_NEW_AUX_STATE),
+   DEFINE_BIT(BRW_NEW_CS_NOOP),
    {0, 0, 0}
 };
 
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 1a32c60ae34..9e93fb14ae8 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -5712,6 +5712,7 @@ genX(init_atoms)(struct brw_context *brw)
 
       &genX(cut_index),
       &gen8_pma_fix,
+      &brw_cs_noop,
    };
 #endif
 
@@ -5731,6 +5732,9 @@ genX(init_atoms)(struct brw_context *brw)
       &brw_cs_work_groups_surface,
       &genX(cs_samplers),
       &genX(cs_state),
+#if GEN_GEN >= 8
+      &brw_cs_noop,
+#endif
    };
 
    STATIC_ASSERT(ARRAY_SIZE(compute_atoms) <= ARRAY_SIZE(brw->compute_atoms));
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 73a6c73f537..f8808683f0c 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -279,6 +279,19 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.OES_copy_image = true;
    }
 
+   if (brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION &&
+       devinfo->gen >= 8) {
+      /* Even though IVB/HSW support the same INSTPM flags as gen8,
+       * experiments show that it's causing problems with transform feedback
+       * tests.
+       *
+       * An MI_LRI into INSTPM with a data of 0 (which should have no effect
+       * as it is a masked register) makes the tests fail. Similarly emitting
+       * a PIPE_CONTROL with LRI_WRITE_IMMEDIATE makes the same tests fail.
+       */
+      ctx->Extensions.INTEL_blackhole_render = true;
+   }
+
    if (devinfo->gen >= 8) {
       ctx->Extensions.ARB_gpu_shader_int64 = devinfo->has_64bit_types;
       /* requires ARB_gpu_shader_int64 */
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index ca4008f8a03..fcd9679ee6d 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -50,6 +50,7 @@
 #include "intel_tex.h"
 #include "brw_context.h"
 #include "brw_defines.h"
+#include "brw_state.h"
 
 #define FILE_DEBUG_FLAG DEBUG_FBO
 
@@ -123,6 +124,8 @@ intel_map_renderbuffer(struct gl_context *ctx,
       return;
    }
 
+   brw_hold_cs_noop(brw);
+
    intel_prepare_render(brw);
 
    /* The MapRenderbuffer API should always return a single-sampled mapping.
@@ -915,6 +918,14 @@ intel_blit_framebuffer(struct gl_context *ctx,
       assert(!"Invalid blit");
    }
 
+   /* When BlackholeRender is enabled, BlitFramebuffer should be turned into a
+    * noop. Since we only enable this extension on gen8+ and this is the only
+    * case where we might be using the blitter/software, we need to bail here
+    * if enabled.
+    */
+   if (ctx->IntelBlackholeRender)
+      return;
+
    /* Try using the BLT engine. */
    mask = intel_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
                                               srcX0, srcY0, srcX1, srcY1,
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index cf957378f92..bd06fca704a 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -37,6 +37,7 @@
 
 #include "brw_context.h"
 #include "brw_blorp.h"
+#include "brw_state.h"
 #include "intel_screen.h"
 #include "intel_batchbuffer.h"
 #include "intel_blit.h"
@@ -273,6 +274,8 @@ intelReadPixels(struct gl_context * ctx,
    intel_prepare_render(brw);
    brw->front_buffer_dirty = dirty;
 
+   brw_hold_cs_noop(brw);
+
    if (_mesa_is_bufferobj(pack->BufferObj)) {
       if (intel_readpixels_blorp(ctx, x, y, width, height,
                                  format, type, pixels, pack))
diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c
index 5a0e09f2551..84dc491e01d 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
@@ -39,6 +39,7 @@
 #include "intel_tex.h"
 #include "intel_blit.h"
 #include "brw_context.h"
+#include "brw_state.h"
 
 #define FILE_DEBUG_FLAG DEBUG_TEXTURE
 
@@ -102,6 +103,8 @@ intelCopyTexSubImage(struct gl_context *ctx, GLuint dims,
 {
    struct brw_context *brw = brw_context(ctx);
 
+   brw_hold_cs_noop(brw);
+
    /* Try BLORP first.  It can handle almost everything. */
    if (brw_blorp_copytexsubimage(brw, rb, texImage, slice, x, y,
                                  xoffset, yoffset, width, height))
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 3fd227ad05d..8bd8dc493a7 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -27,6 +27,7 @@
 #include "intel_tiled_memcpy.h"
 #include "brw_context.h"
 #include "brw_blorp.h"
+#include "brw_state.h"
 
 #define FILE_DEBUG_FLAG DEBUG_TEXTURE
 
@@ -329,6 +330,8 @@ intel_upload_tex(struct gl_context * ctx,
    if (mt && mt->format == MESA_FORMAT_S_UINT8)
       mt->r8stencil_needs_update = true;
 
+   brw_hold_cs_noop(brw);
+
    if (_mesa_is_bufferobj(packing->BufferObj) || tex_busy ||
        mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
       ok = intel_texsubimage_blorp(brw, dims, texImage,
@@ -823,6 +826,8 @@ intel_get_tex_sub_image(struct gl_context *ctx,
 
    DBG("%s\n", __func__);
 
+   brw_hold_cs_noop(brw);
+
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
       if (intel_gettexsubimage_blorp(brw, texImage,
                                      xoffset, yoffset, zoffset,
-- 
2.17.0



More information about the mesa-dev mailing list