[Mesa-dev] [PATCH 2/4] i965: Allow creation of brw_bo from system memory (userptr)

Chris Wilson chris at chris-wilson.co.uk
Thu Aug 16 21:02:10 UTC 2018


Since v3.16 (though universal access was only enabled by default in v4.6),
the kernel has offered the ability to wrap any system memory (i.e. RAM
and not I/O mapped memory) into an object that can be used by the GPU. The
caveat is that this object is marked as cache coherent (so that the client
can continue accessing the memory blissfully ignorant of the
synchronisation required with the GPU) and on !llc platforms this means
that the object is snooped. Snooping imposes a large performance penalty
and is only advised to be used for one-off transfers. However, it provides
another useful tool in the driver toolbox.
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c   | 70 +++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_bufmgr.h   |  8 +++
 src/mesa/drivers/dri/i965/intel_screen.c | 20 +++++++
 src/mesa/drivers/dri/i965/intel_screen.h |  1 +
 4 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index f1675b191c1..ea7886d6df5 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -723,6 +723,74 @@ brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name,
                             flags, tiling, stride);
 }
 
+/*
+ * Wrap the chunk of client memory given by ptr+size inside a GPU
+ * buffer, and make it cache coherent (though on non-LLC architectures
+ * this requires snooping on explicit cache flushes). This allows the
+ * caller to write into the memory chunk and for those writes to be
+ * visible on the GPU (exactly as if they create the buffer and then
+ * persistently mapped it to obtain the pointer).
+ */
+struct brw_bo *
+brw_bo_alloc_userptr(struct brw_bufmgr *bufmgr,
+                     const char *name, void *ptr, uint64_t size)
+{
+   struct brw_bo *bo = calloc(1, sizeof(*bo));
+   if (!bo)
+      return NULL;
+
+   bo->bufmgr = bufmgr;
+   bo->name = name;
+   p_atomic_set(&bo->refcount, 1);
+
+   bo->size = size;
+   bo->map_cpu = ptr;
+   bo->userptr = true;
+   bo->reusable = false;
+   bo->cache_coherent = true;
+   bo->idle = true;
+   bo->kflags = bufmgr->initial_kflags;
+
+   bo->tiling_mode = I915_TILING_NONE;
+   bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+   bo->stride = 0;
+
+   struct drm_i915_gem_userptr arg = {
+      .user_ptr = (uintptr_t)ptr,
+      .user_size = size,
+      .flags = 0,
+   };
+   if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg)) {
+      free(bo);
+      return NULL;
+   }
+
+   bo->gem_handle = arg.handle;
+
+   /* Check the buffer for validity before we try and use it in a batch */
+   struct drm_i915_gem_set_domain sd = {
+      .handle = bo->gem_handle,
+      .read_domains = I915_GEM_DOMAIN_CPU,
+   };
+   if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd))
+      goto err_free;
+
+   if (brw_using_softpin(bufmgr)) {
+      bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, size, 1);
+      if (bo->gtt_offset == 0ull)
+         goto err_free;
+   }
+
+   VG_DEFINED(ptr, size); /* Presume we write to it using the GPU */
+   return bo;
+
+err_free:
+   mtx_lock(&bufmgr->lock);
+   bo_free(bo);
+   mtx_unlock(&bufmgr->lock);
+   return NULL;
+}
+
 /**
  * Returns a brw_bo wrapping the given buffer object handle.
  *
@@ -813,7 +881,7 @@ bo_free(struct brw_bo *bo)
 {
    struct brw_bufmgr *bufmgr = bo->bufmgr;
 
-   if (bo->map_cpu) {
+   if (bo->map_cpu && !bo->userptr) {
       VG_NOACCESS(bo->map_cpu, bo->size);
       drm_munmap(bo->map_cpu, bo->size);
    }
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 32fc7a553c9..ba9cf67b2ec 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -193,6 +193,11 @@ struct brw_bo {
     * Boolean of whether this buffer is cache coherent
     */
    bool cache_coherent;
+
+   /**
+    * Boolean of whether this buffer is a userptr
+    */
+   bool userptr:1;
 };
 
 #define BO_ALLOC_BUSY       (1<<0)
@@ -227,6 +232,9 @@ struct brw_bo *brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr,
                                   uint32_t pitch,
                                   unsigned flags);
 
+struct brw_bo *brw_bo_alloc_userptr(struct brw_bufmgr *bufmgr,
+                                    const char *name, void *ptr, uint64_t size);
+
 /**
  * Allocate a tiled buffer object.
  *
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index f1c195c5d14..1ba2f021fcd 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1889,6 +1889,22 @@ intel_detect_swizzling(struct intel_screen *screen)
    return swizzle_mode != I915_BIT_6_SWIZZLE_NONE;
 }
 
+static bool
+intel_detect_userptr(struct intel_screen *screen)
+{
+   struct drm_i915_gem_userptr arg = {
+      .user_ptr = -4096ULL,
+      .user_size = 8192, /* invalid 64b wrap around */
+   };
+
+   if (screen->devinfo.has_snoop_bug)
+      return false;
+
+   errno = 0;
+   drmIoctl(screen->driScrnPriv->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg);
+   return errno == EFAULT;
+}
+
 static int
 intel_detect_timestamp(struct intel_screen *screen)
 {
@@ -2634,6 +2650,10 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
       screen->kernel_features |= KERNEL_ALLOWS_EXEC_BATCH_FIRST;
    }
 
+   if (intel_detect_userptr(screen)) {
+      screen->kernel_features |= KERNEL_ALLOWS_USERPTR;
+   }
+
    if (!intel_detect_pipelined_so(screen)) {
       /* We can't do anything, so the effective version is 0. */
       screen->cmd_parser_version = 0;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index 8d56fcd9e7a..8a417e419bf 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -82,6 +82,7 @@ struct intel_screen
 #define KERNEL_ALLOWS_EXEC_CAPTURE                  (1<<5)
 #define KERNEL_ALLOWS_EXEC_BATCH_FIRST              (1<<6)
 #define KERNEL_ALLOWS_CONTEXT_ISOLATION             (1<<7)
+#define KERNEL_ALLOWS_USERPTR                       (1<<8)
 
    struct brw_bufmgr *bufmgr;
 
-- 
2.18.0



More information about the mesa-dev mailing list