[Mesa-dev] [PATCH] intel: Use a CPU map of the batch on LLC-sharing architectures.

Eric Anholt eric at anholt.net
Sun Jan 20 14:59:20 PST 2013


Before, we were keeping a CPU-only buffer to accumulate the batchbuffer in,
which was an improvement over mapping the batch through the GTT directly
(since any readback or other failure to stream through write combining
correctly would hurt).  However, on LLC-sharing architectures we can do better
by mapping the batch directly, which reduces the cache footprint of the
application since we no longer have this extra copy of a batchbuffer around.

Improves performance of GLBenchmark 2.1 offscreen on IVB by 3.5% +/- 0.4%
(n=21).  Improves Lightsmark performance by 1.1 +/- 0.1% (n=76).  Improves
cairo-gl performance by 1.9% +/- 1.4% (n=57).

No statistically significant difference in GLB2.1 on SNB (n=37).  Improves
cairo-gl performance by 2.1% +/- 0.1% (n=278).
---
 src/mesa/drivers/dri/intel/intel_batchbuffer.c |   26 ++++++++++++++++++------
 src/mesa/drivers/dri/intel/intel_batchbuffer.h |    2 +-
 src/mesa/drivers/dri/intel/intel_context.c     |    2 +-
 src/mesa/drivers/dri/intel/intel_context.h     |    3 ++-
 4 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index d36dacc..8c6524e 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -68,6 +68,11 @@ intel_batchbuffer_init(struct intel_context *intel)
 						      "pipe_control workaround",
 						      4096, 4096);
    }
+
+   if (!intel->has_llc) {
+      intel->batch.cpu_map = malloc(intel->maxBatchSize);
+      intel->batch.map = intel->batch.cpu_map;
+   }
 }
 
 static void
@@ -83,6 +88,10 @@ intel_batchbuffer_reset(struct intel_context *intel)
 
    intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
 					intel->maxBatchSize, 4096);
+   if (intel->has_llc) {
+      drm_intel_bo_map(intel->batch.bo, true);
+      intel->batch.map = intel->batch.bo->virtual;
+   }
 
    intel->batch.reserved_space = BATCH_RESERVED;
    intel->batch.state_batch_offset = intel->batch.bo->size;
@@ -114,6 +123,7 @@ intel_batchbuffer_reset_to_saved(struct intel_context *intel)
 void
 intel_batchbuffer_free(struct intel_context *intel)
 {
+   free(intel->batch.cpu_map);
    drm_intel_bo_unreference(intel->batch.last_bo);
    drm_intel_bo_unreference(intel->batch.bo);
    drm_intel_bo_unreference(intel->batch.workaround_bo);
@@ -168,12 +178,16 @@ do_flush_locked(struct intel_context *intel)
    struct intel_batchbuffer *batch = &intel->batch;
    int ret = 0;
 
-   ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
-   if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
-      ret = drm_intel_bo_subdata(batch->bo,
-				 batch->state_batch_offset,
-				 batch->bo->size - batch->state_batch_offset,
-				 (char *)batch->map + batch->state_batch_offset);
+   if (intel->has_llc) {
+      drm_intel_bo_unmap(batch->bo);
+   } else {
+      ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
+      if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
+	 ret = drm_intel_bo_subdata(batch->bo,
+				    batch->state_batch_offset,
+				    batch->bo->size - batch->state_batch_offset,
+				    (char *)batch->map + batch->state_batch_offset);
+      }
    }
 
    if (!intel->intelScreen->no_hw) {
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
index bae6555..39e7d26 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -112,7 +112,7 @@ intel_batchbuffer_require_space(struct intel_context *intel,
    intel->batch.is_blit = is_blit;
 
 #ifdef DEBUG
-   assert(sz < sizeof(intel->batch.map) - BATCH_RESERVED);
+   assert(sz < intel->maxBatchSize - BATCH_RESERVED);
 #endif
    if (intel_batchbuffer_space(intel) < sz)
       intel_batchbuffer_flush(intel);
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index ab97d66..9505471 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -708,7 +708,7 @@ intelInitContext(struct intel_context *intel,
    if (intel->gen < 4)
       intel->maxBatchSize = 4096;
    else
-      intel->maxBatchSize = sizeof(intel->batch.map);
+      intel->maxBatchSize = BATCH_SZ;
 
    intel->bufmgr = intelScreen->bufmgr;
 
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 80e4cac..af49ab1 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -129,7 +129,8 @@ struct intel_batchbuffer {
 
    uint16_t emit, total;
    uint16_t used, reserved_space;
-   uint32_t map[8192];
+   uint32_t *map;
+   uint32_t *cpu_map;
 #define BATCH_SZ (8192*sizeof(uint32_t))
 
    uint32_t state_batch_offset;
-- 
1.7.10.4



More information about the mesa-dev mailing list