[Libva] [PATCH] memman: implement PIPE_CONTROL workaround.
Xiang, Haihao
haihao.xiang at intel.com
Wed Jul 11 19:21:50 PDT 2012
On Wed, 2012-07-11 at 09:16 +0200, Gwenole Beauchesne wrote:
> From: Daniel Vetter <daniel.vetter at ffwll.ch>
>
> Sandybdrige requires an elaborate dance to flush caches without
> hanging the gpu. See public docs Vol2Part1 1.7.4.1 PIPE_CONTROL
> or the corrensponding code in mesa/kernel.
>
> v2: Incorporate review from Chris Wilson. For paranoia keep all three
> PIPE_CONTROL cmds in the same batchbuffer to avoid upsetting the gpu.
>
> Signed-off-by: Daniel Vetter <daniel.vetter at ffwll.ch>
> [Gwenole: merged from xf86-video-intel]
> Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
> ---
> src/intel_batchbuffer.c | 54 +++++++++++++++++++++++++++++++++++-----------
> src/intel_driver.h | 3 ++
> src/intel_memman.c | 6 +++++
> 3 files changed, 50 insertions(+), 13 deletions(-)
>
> diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
> index 0b52281..77a2c90 100644
> --- a/src/intel_batchbuffer.c
> +++ b/src/intel_batchbuffer.c
> @@ -154,6 +154,36 @@ intel_batchbuffer_data(struct intel_batchbuffer *batch,
> batch->ptr += size;
> }
>
> +static void
> +intel_batchbuffer_emit_post_sync_nonzero_flush(struct intel_batchbuffer *batch)
> +{
> + struct intel_driver_data * const intel = batch->intel;
> +
> + /* Keep this entire sequence of 3 PIPE_CONTROL cmds in one batch to
> + avoid upsetting the gpu. */
> + BEGIN_BATCH(batch, 3*4);
> + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
> + OUT_BATCH(batch, (CMD_PIPE_CONTROL_CS_STALL |
> + CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD));
> + OUT_BATCH(batch, 0); /* address */
> + OUT_BATCH(batch, 0); /* write data */
> +
> + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
> + OUT_BATCH(batch, CMD_PIPE_CONTROL_WRITE_QWORD);
> + OUT_RELOC(batch, intel->wa_scratch_bo,
> + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
> + OUT_BATCH(batch, 0); /* write data */
The length (DW0[7:0]) is 3 for QWORD write. The batch buffer used on
Linux is a non-secure batch buffer, so the address given here must be in
a PPGTT address, which is valid only with the Linux 3.4+(?) kernel. In
addition, is this operation needed ? The doc only says some store data
commands are needed for TLB invalidate.
> +
> + /* now finally the _real flush */
> + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
> + OUT_BATCH(batch, (CMD_PIPE_CONTROL_WC_FLUSH |
> + CMD_PIPE_CONTROL_TC_FLUSH |
> + CMD_PIPE_CONTROL_NOWRITE));
> + OUT_BATCH(batch, 0); /* write address */
> + OUT_BATCH(batch, 0); /* write data */
> + ADVANCE_BATCH(batch);
> +}
> +
> void
> intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
> {
> @@ -162,24 +192,22 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
> if (IS_GEN6(intel->device_id) ||
> IS_GEN7(intel->device_id)) {
> if (batch->flag == I915_EXEC_RENDER) {
> - BEGIN_BATCH(batch, 4);
> - OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2);
> -
> - if (IS_GEN6(intel->device_id))
> - OUT_BATCH(batch,
> - CMD_PIPE_CONTROL_WC_FLUSH |
> - CMD_PIPE_CONTROL_TC_FLUSH |
> - CMD_PIPE_CONTROL_NOWRITE);
> - else
> + if (IS_GEN6(intel->device_id)) {
> + /* HW workaround for Sandy Bridge */
> + intel_batchbuffer_emit_post_sync_nonzero_flush(batch);
> + }
> + else {
> + BEGIN_BATCH(batch, 4);
> + OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2);
> OUT_BATCH(batch,
> CMD_PIPE_CONTROL_WC_FLUSH |
> CMD_PIPE_CONTROL_TC_FLUSH |
> CMD_PIPE_CONTROL_DC_FLUSH |
> CMD_PIPE_CONTROL_NOWRITE);
> -
> - OUT_BATCH(batch, 0);
> - OUT_BATCH(batch, 0);
> - ADVANCE_BATCH(batch);
> + OUT_BATCH(batch, 0);
> + OUT_BATCH(batch, 0);
> + ADVANCE_BATCH(batch);
> + }
> } else {
> if (batch->flag == I915_EXEC_BLT) {
> BEGIN_BLT_BATCH(batch, 4);
> diff --git a/src/intel_driver.h b/src/intel_driver.h
> index b383218..ad95e41 100644
> --- a/src/intel_driver.h
> +++ b/src/intel_driver.h
> @@ -42,6 +42,7 @@
> #define BR13_8888 (0x3 << 24)
>
> #define CMD_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | (0 << 16))
> +#define CMD_PIPE_CONTROL_CS_STALL (1 << 20)
> #define CMD_PIPE_CONTROL_NOWRITE (0 << 14)
> #define CMD_PIPE_CONTROL_WRITE_QWORD (1 << 14)
> #define CMD_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
> @@ -54,6 +55,7 @@
> #define CMD_PIPE_CONTROL_DC_FLUSH (1 << 5)
> #define CMD_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
> #define CMD_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
> +#define CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
> #define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
>
>
> @@ -116,6 +118,7 @@ struct intel_driver_data
> int locked;
>
> dri_bufmgr *bufmgr;
> + dri_bo *wa_scratch_bo;
>
> unsigned int has_exec2 : 1; /* Flag: has execbuffer2? */
> unsigned int has_bsd : 1; /* Flag: has bitstream decoder for H.264? */
> diff --git a/src/intel_memman.c b/src/intel_memman.c
> index 7d56e96..cde267e 100644
> --- a/src/intel_memman.c
> +++ b/src/intel_memman.c
> @@ -38,12 +38,18 @@ intel_memman_init(struct intel_driver_data *intel)
> assert(intel->bufmgr);
> intel_bufmgr_gem_enable_reuse(intel->bufmgr);
>
> + if (IS_GEN6(intel->device_id)) {
> + intel->wa_scratch_bo =
> + drm_intel_bo_alloc(intel->bufmgr, "wa scratch", 4096, 4096);
> + assert(intel->wa_scratch_bo);
> + }
> return True;
> }
>
> Bool
> intel_memman_terminate(struct intel_driver_data *intel)
> {
> + drm_intel_bo_unreference(intel->wa_scratch_bo);
> drm_intel_bufmgr_destroy(intel->bufmgr);
> return True;
> }
More information about the Libva
mailing list