[Mesa-dev] [PATCH v4 7/7] i965: enable INTEL_blackhole_render

Daniel Vetter daniel at ffwll.ch
Tue Jul 10 14:04:47 UTC 2018


On Wed, Jun 20, 2018 at 06:25:34PM +0100, Lionel Landwerlin wrote:
> v2: condition the extension on context isolation support from the
>     kernel (Chris)
> 
> v3: (Lionel)
> 
>     The initial version of this change used a feature of the Gen7+
>     command parser to turn the primitive instructions into no-ops.
>     Unfortunately this doesn't play well with how we're using the
>     hardware outside of the user submitted commands. For example
>     resolves are implicit operations which should not be turned into
>     no-ops as part of the previously submitted commands (before
>     blackhole_render is enabled) might not be disabled. For example
>     this sequence :
> 
>        glClear();
>        glEnable(GL_BLACKHOLE_RENDER_INTEL);
>        glDrawArrays(...);
>        glReadPixels(...);
>        glDisable(GL_BLACKHOLE_RENDER_INTEL);
> 
>     While clear has been emitted outside the blackhole render, it
>     should still be resolved properly in the read pixels. Hence we
>     need to be more selective and only disable user submitted
>     commands.
> 
>     This v3 manually turns primitives into MI_NOOP if blackhole render
>     is enabled. This lets us enable this feature on any platform.
> 
> v4: Limit support to gen7.5+ (Lionel)
> 
> v5: Enable Gen7.5 support again, requires a kernel update of the
>     command parser (Lionel)
> 
> v6: Disable Gen7.5 again... Kernel devs want these patches landed
>     before they accept the kernel patches to whitelist INSTPM (Lionel)

Hm, this doesn't quite read how kernel patches are usually handled:
Ordering sequence is:
1. get everything reviewed and tested (both kernel and userspace), but do
not yet start merging
2. merge kernel (if you feel paranoid, wait until Dave Airlie accepted it
into drm-next)
3. merge userspace

Insisting that the userspace stuff lands before the kernel (even if it's
just prep work) is kinda the wrong way round, and needlessly complicates
the process.

This is all documented in full details in

https://dri.freedesktop.org/docs/drm/gpu/drm-uapi.html#open-source-userspace-requirements

Cheers, Daniel

> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_clear.c         |  3 +
>  src/mesa/drivers/dri/i965/brw_context.h       |  2 +
>  src/mesa/drivers/dri/i965/brw_defines.h       |  8 ++-
>  src/mesa/drivers/dri/i965/brw_misc_state.c    | 56 +++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_state.h         |  4 ++
>  src/mesa/drivers/dri/i965/brw_state_upload.c  |  2 +
>  src/mesa/drivers/dri/i965/genX_state_upload.c |  4 ++
>  src/mesa/drivers/dri/i965/intel_extensions.c  |  8 +++
>  src/mesa/drivers/dri/i965/intel_fbo.c         |  6 ++
>  src/mesa/drivers/dri/i965/intel_pixel_read.c  |  3 +
>  src/mesa/drivers/dri/i965/intel_tex_copy.c    |  3 +
>  src/mesa/drivers/dri/i965/intel_tex_image.c   |  5 ++
>  12 files changed, 103 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
> index b097dfe346c..d3e360b3e23 100644
> --- a/src/mesa/drivers/dri/i965/brw_clear.c
> +++ b/src/mesa/drivers/dri/i965/brw_clear.c
> @@ -247,6 +247,9 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
>     if (!_mesa_check_conditional_render(ctx))
>        return;
>  
> +   if (ctx->IntelBlackholeRender)
> +      return;
> +
>     if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
>        brw->front_buffer_dirty = true;
>     }
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 0880d18b6f0..23602df2138 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -218,6 +218,7 @@ enum brw_state_id {
>     BRW_STATE_CONSERVATIVE_RASTERIZATION,
>     BRW_STATE_DRAW_CALL,
>     BRW_STATE_AUX,
> +   BRW_STATE_CS_NOOP,
>     BRW_NUM_STATE_BITS
>  };
>  
> @@ -309,6 +310,7 @@ enum brw_state_id {
>  #define BRW_NEW_CONSERVATIVE_RASTERIZATION (1ull << BRW_STATE_CONSERVATIVE_RASTERIZATION)
>  #define BRW_NEW_DRAW_CALL               (1ull << BRW_STATE_DRAW_CALL)
>  #define BRW_NEW_AUX_STATE               (1ull << BRW_STATE_AUX)
> +#define BRW_NEW_CS_NOOP                 (1ull << BRW_STATE_CS_NOOP)
>  
>  struct brw_state_flags {
>     /** State update flags signalled by mesa internals */
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 320426d6944..4e2d6acc706 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1651,11 +1651,17 @@ enum brw_pixel_shader_coverage_mask_mode {
>  #define GEN10_CACHE_MODE_SS            0x0e420
>  #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
>  
> -#define INSTPM                             0x20c0
> +#define INSTPM                             0x20c0 /* Gen6-8 */
>  # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
> +# define INSTPM_GLOBAL_DEBUG_ENABLE                    (1 << 4)
> +# define INSTPM_MEDIA_INSTRUCTION_DISABLE              (1 << 3)
> +# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 2)
> +# define INSTPM_3D_STATE_INSTRUCTION_DISABLE           (1 << 1)
>  
>  #define CS_DEBUG_MODE2                     0x20d8 /* Gen9+ */
>  # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
> +# define CSDBG2_MEDIA_INSTRUCTION_DISABLE              (1 << 1)
> +# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 0)
>  
>  #define GEN7_RPSTAT1                       0xA01C
>  #define  GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT   7
> diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
> index 9a663b1d61c..baf64757b93 100644
> --- a/src/mesa/drivers/dri/i965/brw_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
> @@ -811,3 +811,59 @@ brw_upload_state_base_address(struct brw_context *brw)
>     brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
>     brw->batch.state_base_address_emitted = true;
>  }
> +
> +void
> +brw_set_cs_noop(struct brw_context *brw, bool enable_noop)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   uint32_t reg_offset = devinfo->gen >= 9 ? CS_DEBUG_MODE2 : INSTPM;
> +   uint32_t reg_mask = devinfo->gen >= 9 ?
> +      REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE |
> +               CSDBG2_MEDIA_INSTRUCTION_DISABLE) :
> +      REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
> +               INSTPM_MEDIA_INSTRUCTION_DISABLE);
> +   uint32_t reg_value = 0;
> +
> +   if (enable_noop) {
> +      reg_value = devinfo->gen >= 9 ?
> +         (CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE |
> +          CSDBG2_MEDIA_INSTRUCTION_DISABLE) :
> +         (INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
> +          INSTPM_MEDIA_INSTRUCTION_DISABLE);
> +   }
> +
> +   if (devinfo->gen >= 8) {
> +      brw_emit_pipe_control_write(brw,
> +                                  PIPE_CONTROL_LRI_WRITE_IMMEDIATE |
> +                                  PIPE_CONTROL_CS_STALL,
> +                                  NULL, reg_offset, reg_mask | reg_value);
> +   } else {
> +      brw_load_register_imm32_force_posted(brw,
> +                                           reg_offset,
> +                                           reg_mask | reg_value);
> +   }
> +}
> +
> +void
> +brw_hold_cs_noop(struct brw_context *brw)
> +{
> +   if (!brw->ctx.IntelBlackholeRender)
> +      return;
> +
> +   brw_set_cs_noop(brw, false);
> +   brw->ctx.NewDriverState |= brw->ctx.DriverFlags.NewIntelBlackholeRender;
> +}
> +
> +static void
> +brw_emit_cs_noop(struct brw_context *brw)
> +{
> +   brw_set_cs_noop(brw, brw->ctx.IntelBlackholeRender);
> +}
> +
> +const struct brw_tracked_state brw_cs_noop = {
> +   .dirty = {
> +      .mesa = 0,
> +      .brw = BRW_NEW_CS_NOOP,
> +   },
> +   .emit = brw_emit_cs_noop,
> +};
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> index 1b4745ef753..4e10f35e157 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -94,6 +94,7 @@ extern const struct brw_tracked_state gen7_push_constant_space;
>  extern const struct brw_tracked_state gen7_urb;
>  extern const struct brw_tracked_state gen8_pma_fix;
>  extern const struct brw_tracked_state brw_cs_work_groups_surface;
> +extern const struct brw_tracked_state brw_cs_noop;
>  
>  static inline bool
>  brw_state_dirty(const struct brw_context *brw,
> @@ -116,6 +117,9 @@ brw_depthbuffer_format(struct brw_context *brw);
>  
>  void brw_upload_state_base_address(struct brw_context *brw);
>  
> +void brw_set_cs_noop(struct brw_context *brw, bool enable_noop);
> +void brw_hold_cs_noop(struct brw_context *brw);
> +
>  /* gen8_depth_state.c */
>  void gen8_write_pma_stall_bits(struct brw_context *brw,
>                                 uint32_t pma_stall_bits);
> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index d8273aa5734..7c90249c6c4 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> @@ -234,6 +234,7 @@ void brw_init_state( struct brw_context *brw )
>     ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
>     ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
>     ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
> +   ctx->DriverFlags.NewIntelBlackholeRender = BRW_NEW_CS_NOOP;
>  }
>  
>  
> @@ -369,6 +370,7 @@ static struct dirty_bit_map brw_bits[] = {
>     DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
>     DEFINE_BIT(BRW_NEW_DRAW_CALL),
>     DEFINE_BIT(BRW_NEW_AUX_STATE),
> +   DEFINE_BIT(BRW_NEW_CS_NOOP),
>     {0, 0, 0}
>  };
>  
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index 88fde9d12fd..3f444967bfa 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -5770,6 +5770,7 @@ genX(init_atoms)(struct brw_context *brw)
>  
>        &genX(cut_index),
>        &gen8_pma_fix,
> +      &brw_cs_noop,
>     };
>  #endif
>  
> @@ -5789,6 +5790,9 @@ genX(init_atoms)(struct brw_context *brw)
>        &brw_cs_work_groups_surface,
>        &genX(cs_samplers),
>        &genX(cs_state),
> +#if GEN_GEN >= 8
> +      &brw_cs_noop,
> +#endif
>     };
>  
>     STATIC_ASSERT(ARRAY_SIZE(compute_atoms) <= ARRAY_SIZE(brw->compute_atoms));
> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
> index 5a9369d7b43..88d24bc2538 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -292,6 +292,14 @@ intelInitExtensions(struct gl_context *ctx)
>        ctx->Extensions.OES_texture_view = true;
>     }
>  
> +   if (brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION &&
> +       devinfo->gen >= 8) {
> +      /* We'll be able to enable Haswell when the kernel command parser allows
> +       * INSTPM.
> +       */
> +      ctx->Extensions.INTEL_blackhole_render = true;
> +   }
> +
>     if (devinfo->gen >= 8) {
>        ctx->Extensions.ARB_gpu_shader_int64 = devinfo->has_64bit_types;
>        /* requires ARB_gpu_shader_int64 */
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
> index fb84b738c08..dfcb626c38e 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> @@ -50,6 +50,7 @@
>  #include "intel_tex.h"
>  #include "brw_context.h"
>  #include "brw_defines.h"
> +#include "brw_state.h"
>  
>  #define FILE_DEBUG_FLAG DEBUG_FBO
>  
> @@ -123,6 +124,8 @@ intel_map_renderbuffer(struct gl_context *ctx,
>        return;
>     }
>  
> +   brw_hold_cs_noop(brw);
> +
>     intel_prepare_render(brw);
>  
>     /* The MapRenderbuffer API should always return a single-sampled mapping.
> @@ -881,6 +884,9 @@ intel_blit_framebuffer(struct gl_context *ctx,
>     if (!_mesa_check_conditional_render(ctx))
>        return;
>  
> +   if (ctx->IntelBlackholeRender)
> +      return;
> +
>     if (devinfo->gen < 6) {
>        /* On gen4-5, try BLT first.
>         *
> diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
> index 6ed7895bc76..865132ada8a 100644
> --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
> +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
> @@ -37,6 +37,7 @@
>  
>  #include "brw_context.h"
>  #include "brw_blorp.h"
> +#include "brw_state.h"
>  #include "intel_screen.h"
>  #include "intel_batchbuffer.h"
>  #include "intel_buffers.h"
> @@ -272,6 +273,8 @@ intelReadPixels(struct gl_context * ctx,
>     intel_prepare_render(brw);
>     brw->front_buffer_dirty = dirty;
>  
> +   brw_hold_cs_noop(brw);
> +
>     if (_mesa_is_bufferobj(pack->BufferObj)) {
>        if (intel_readpixels_blorp(ctx, x, y, width, height,
>                                   format, type, pixels, pack))
> diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c
> index bc209c605bd..1032cd659e4 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
> @@ -38,6 +38,7 @@
>  #include "intel_fbo.h"
>  #include "intel_tex.h"
>  #include "brw_context.h"
> +#include "brw_state.h"
>  
>  #define FILE_DEBUG_FLAG DEBUG_TEXTURE
>  
> @@ -52,6 +53,8 @@ intelCopyTexSubImage(struct gl_context *ctx, GLuint dims,
>  {
>     struct brw_context *brw = brw_context(ctx);
>  
> +   brw_hold_cs_noop(brw);
> +
>     /* Try BLORP first.  It can handle almost everything. */
>     if (brw_blorp_copytexsubimage(brw, rb, texImage, slice, x, y,
>                                   xoffset, yoffset, width, height))
> diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
> index fae179214dd..027eca7e121 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_image.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
> @@ -26,6 +26,7 @@
>  #include "intel_tiled_memcpy.h"
>  #include "brw_context.h"
>  #include "brw_blorp.h"
> +#include "brw_state.h"
>  
>  #define FILE_DEBUG_FLAG DEBUG_TEXTURE
>  
> @@ -328,6 +329,8 @@ intel_upload_tex(struct gl_context * ctx,
>     if (mt && mt->format == MESA_FORMAT_S_UINT8)
>        mt->r8stencil_needs_update = true;
>  
> +   brw_hold_cs_noop(brw);
> +
>     if (_mesa_is_bufferobj(packing->BufferObj) || tex_busy ||
>         mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
>        ok = intel_texsubimage_blorp(brw, dims, texImage,
> @@ -822,6 +825,8 @@ intel_get_tex_sub_image(struct gl_context *ctx,
>  
>     DBG("%s\n", __func__);
>  
> +   brw_hold_cs_noop(brw);
> +
>     if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
>        if (intel_gettexsubimage_blorp(brw, texImage,
>                                       xoffset, yoffset, zoffset,
> -- 
> 2.17.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the mesa-dev mailing list