[Mesa-dev] [PATCH 54/70] i965: Enable GPU snooping of CPU caches for select buffers

Chris Wilson chris at chris-wilson.co.uk
Fri Aug 7 13:13:58 PDT 2015


On LLC, all buffers are normally cache coherent between the CPU and the
GPU, giving both parties fast access to shared data.

However, older architectures or Atoms, do not implement LLC between the
CPU and GPU. Instead they utilise a snooping architecture where the GPU
can snoop the CPU cache when told. The snooping has much higher overhead
(i.e. slower) than regular memory fetches so its use should be reserved
to instances where the data likely resides in the CPU cache and the GPU
need only use it once (e.g. for streaming textures from the CPU). The
other major benefit is that it can also push data to the CPU in a cache
coherent fashion, and so CPU access to a snooped buffer is very fast,
making it preferrable for anytime we need to readback data from the GPU

For demonstration we set the snoop flag for reading back via a blit from
a miptree, and for reading back transform feedback.
---
 src/mesa/drivers/dri/i965/brw_batch.c         | 31 +++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_batch.h         |  9 ++++++++
 src/mesa/drivers/dri/i965/gen6_sol.c          |  1 +
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  1 +
 4 files changed, 42 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c
index f7a060f..099da72 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_batch.c
@@ -1363,6 +1363,37 @@ struct brw_bo *brw_bo_create(struct brw_batch *batch,
    return bo;
 }
 
+static bool __brw_bo_set_caching(struct brw_bo *bo, int caching)
+{
+   struct drm_i915_gem_caching arg;
+
+   memset(&arg, 0, sizeof(arg));
+   arg.handle = bo->handle;
+   arg.caching = caching;
+   return drmIoctl(bo->batch->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) == 0;
+}
+
+void brw_bo_enable_snoop(struct brw_bo *bo)
+{
+   assert(bo->reusable);
+
+   if (bo->cache_coherent)
+      return;
+
+   if (bo->tiling)
+      return; /* XXX abort? */
+
+   if (!__brw_bo_set_caching(bo, I915_CACHING_CACHED))
+      return;
+
+   drm_intel_bo_disable_reuse(bo->base);
+   if (bo->reusable)
+      list_move(&bo->link, &bo->batch->inactive);
+
+   bo->reusable = false;
+   bo->cache_coherent = true;
+}
+
 static uint64_t brw_surface_size(int cpp,
                                  uint32_t width,
                                  uint32_t height,
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h
index 94cb102..1e80000 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -254,6 +254,15 @@ struct brw_bo *brw_bo_create_from_name(struct brw_batch *batch,
                                        const char *name,
                                        uint32_t global_name);
 
+/* Enable CPU cache coherent to the buffer. On LLC, normally all buffers
+ * are cache coherent, but on non-LLC architectures we can tell the GPU
+ * to snoop from and to flush into the CPU cache. Performing the snoop
+ * is slower for the GPU, but eliminates the uncached penalty from the CPU,
+ * so it only useful for streaming data (read once) to the GPU or when
+ * we need to read anything back from the GPU.
+ */
+void brw_bo_enable_snoop(struct brw_bo *bo);
+
 void brw_bo_mark_dirty(struct brw_batch *batch, brw_bo *bo);
 void brw_batch_clear_dirty(struct brw_batch *batch);
 
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
index d75774c..7766cd4 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -209,6 +209,7 @@ brw_new_transform_feedback(struct gl_context *ctx, GLuint name)
    brw_obj->prim_count_bo =
       brw_bo_create(&brw->batch, "xfb primitive counts",
                     4096, 64, BO_ALLOC_FOR_RENDER);
+   brw_bo_enable_snoop(brw_obj->prim_count_bo);
 
    return &brw_obj->base;
 }
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 0410d06..57df6a4 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2140,6 +2140,7 @@ intel_miptree_map_blit(struct brw_context *brw,
       goto fail;
    }
    map->stride = map->mt->pitch;
+   brw_bo_enable_snoop(map->mt->bo);
 
    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
-- 
2.5.0



More information about the mesa-dev mailing list