[Libva] [PATCH] memman: implement PIPE_CONTROL workaround.

Gwenole Beauchesne gb.devel at gmail.com
Wed Jul 11 00:16:33 PDT 2012


From: Daniel Vetter <daniel.vetter at ffwll.ch>

Sandybdrige requires an elaborate dance to flush caches without
hanging the gpu. See public docs Vol2Part1 1.7.4.1 PIPE_CONTROL
or the corrensponding code in mesa/kernel.

v2: Incorporate review from Chris Wilson. For paranoia keep all three
PIPE_CONTROL cmds in the same batchbuffer to avoid upsetting the gpu.

Signed-off-by: Daniel Vetter <daniel.vetter at ffwll.ch>
[Gwenole: merged from xf86-video-intel]
Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
---
 src/intel_batchbuffer.c |   54 +++++++++++++++++++++++++++++++++++-----------
 src/intel_driver.h      |    3 ++
 src/intel_memman.c      |    6 +++++
 3 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 0b52281..77a2c90 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -154,6 +154,36 @@ intel_batchbuffer_data(struct intel_batchbuffer *batch,
     batch->ptr += size;
 }
 
+static void
+intel_batchbuffer_emit_post_sync_nonzero_flush(struct intel_batchbuffer *batch)
+{
+    struct intel_driver_data * const intel = batch->intel; 
+
+    /* Keep this entire sequence of 3 PIPE_CONTROL cmds in one batch to
+       avoid upsetting the gpu. */
+    BEGIN_BATCH(batch, 3*4);
+    OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
+    OUT_BATCH(batch, (CMD_PIPE_CONTROL_CS_STALL |
+                      CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD));
+    OUT_BATCH(batch, 0); /* address */
+    OUT_BATCH(batch, 0); /* write data */
+
+    OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
+    OUT_BATCH(batch, CMD_PIPE_CONTROL_WRITE_QWORD);
+    OUT_RELOC(batch, intel->wa_scratch_bo,
+              I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+    OUT_BATCH(batch, 0); /* write data */
+
+    /* now finally the _real flush */
+    OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
+    OUT_BATCH(batch, (CMD_PIPE_CONTROL_WC_FLUSH |
+                      CMD_PIPE_CONTROL_TC_FLUSH |
+                      CMD_PIPE_CONTROL_NOWRITE));
+    OUT_BATCH(batch, 0); /* write address */
+    OUT_BATCH(batch, 0); /* write data */
+    ADVANCE_BATCH(batch);
+}
+
 void
 intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
 {
@@ -162,24 +192,22 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
     if (IS_GEN6(intel->device_id) ||
         IS_GEN7(intel->device_id)) {
         if (batch->flag == I915_EXEC_RENDER) {
-            BEGIN_BATCH(batch, 4);
-            OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2);
-
-            if (IS_GEN6(intel->device_id))
-                OUT_BATCH(batch, 
-                          CMD_PIPE_CONTROL_WC_FLUSH |
-                          CMD_PIPE_CONTROL_TC_FLUSH |
-                          CMD_PIPE_CONTROL_NOWRITE);
-            else
+            if (IS_GEN6(intel->device_id)) {
+                /* HW workaround for Sandy Bridge */
+                intel_batchbuffer_emit_post_sync_nonzero_flush(batch);
+            }
+            else {
+                BEGIN_BATCH(batch, 4);
+                OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2);
                 OUT_BATCH(batch, 
                           CMD_PIPE_CONTROL_WC_FLUSH |
                           CMD_PIPE_CONTROL_TC_FLUSH |
                           CMD_PIPE_CONTROL_DC_FLUSH |
                           CMD_PIPE_CONTROL_NOWRITE);
-
-            OUT_BATCH(batch, 0);
-            OUT_BATCH(batch, 0);
-            ADVANCE_BATCH(batch);
+                OUT_BATCH(batch, 0);
+                OUT_BATCH(batch, 0);
+                ADVANCE_BATCH(batch);
+            }
         } else {
             if (batch->flag == I915_EXEC_BLT) {
                 BEGIN_BLT_BATCH(batch, 4);
diff --git a/src/intel_driver.h b/src/intel_driver.h
index b383218..ad95e41 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -42,6 +42,7 @@
 #define BR13_8888                               (0x3 << 24)
 
 #define CMD_PIPE_CONTROL                        (CMD_3D | (3 << 27) | (2 << 24) | (0 << 16))
+#define CMD_PIPE_CONTROL_CS_STALL               (1 << 20)
 #define CMD_PIPE_CONTROL_NOWRITE                (0 << 14)
 #define CMD_PIPE_CONTROL_WRITE_QWORD            (1 << 14)
 #define CMD_PIPE_CONTROL_WRITE_DEPTH            (2 << 14)
@@ -54,6 +55,7 @@
 #define CMD_PIPE_CONTROL_DC_FLUSH               (1 << 5)
 #define CMD_PIPE_CONTROL_GLOBAL_GTT             (1 << 2)
 #define CMD_PIPE_CONTROL_LOCAL_PGTT             (0 << 2)
+#define CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD    (1 << 1)
 #define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH      (1 << 0)
 
 
@@ -116,6 +118,7 @@ struct intel_driver_data
     int locked;
 
     dri_bufmgr *bufmgr;
+    dri_bo *wa_scratch_bo;
 
     unsigned int has_exec2  : 1; /* Flag: has execbuffer2? */
     unsigned int has_bsd    : 1; /* Flag: has bitstream decoder for H.264? */
diff --git a/src/intel_memman.c b/src/intel_memman.c
index 7d56e96..cde267e 100644
--- a/src/intel_memman.c
+++ b/src/intel_memman.c
@@ -38,12 +38,18 @@ intel_memman_init(struct intel_driver_data *intel)
     assert(intel->bufmgr);
     intel_bufmgr_gem_enable_reuse(intel->bufmgr);
 
+    if (IS_GEN6(intel->device_id)) {
+        intel->wa_scratch_bo =
+            drm_intel_bo_alloc(intel->bufmgr, "wa scratch", 4096, 4096);
+        assert(intel->wa_scratch_bo);
+    }
     return True;
 }
 
 Bool 
 intel_memman_terminate(struct intel_driver_data *intel)
 {
+    drm_intel_bo_unreference(intel->wa_scratch_bo);
     drm_intel_bufmgr_destroy(intel->bufmgr);
     return True;
 }
-- 
1.7.5.4



More information about the Libva mailing list