[Mesa-dev] [PATCH 3/6] i965: Allow creation of brw_bo from system memory (userptr)

Chris Wilson chris at chris-wilson.co.uk
Fri Oct 13 09:34:53 UTC 2017


Since v3.16 (though universal access was only enabled by default in v4.6),
the kernel has offered the ability to wrap any system memory (i.e. RAM
and not I/O mapped memory) into an object that can be used by the GPU. The
caveat is that this object is marked as cache coherent (so that the client
can continue accessing the memory blissfully ignorant of the
synchronisation required with the GPU) and on !llc platforms this means
that the object is snooped. Snooping imposes a large performance penalty
and is only advised to be used for one-off transfers. However, it provides
another useful tool in the driver toolbox.
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c   | 67 +++++++++++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_bufmgr.h   | 11 ++++++
 src/mesa/drivers/dri/i965/intel_screen.c | 20 ++++++++++
 src/mesa/drivers/dri/i965/intel_screen.h |  1 +
 4 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 82bf30727e..f12a3786eb 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -462,6 +462,71 @@ brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name,
    return bo_alloc_internal(bufmgr, name, size, flags, tiling, stride, 0);
 }
 
+/*
+ * Wrap the chunk of client memory given by ptr+size inside a GPU
+ * buffer, and make it cache coherent (though on non-LLC architectures
+ * this requires snooping on explicit cache flushes). This allows the
+ * caller to write into the memory chunk and for those writes to be
+ * visible on the GPU (exactly as if they create the buffer and then
+ * persistently mapped it to obtain the pointer).
+ */
+struct brw_bo *
+brw_bo_alloc_userptr(struct brw_bufmgr *bufmgr,
+                     const char *name,
+                     void *ptr,
+                     uint64_t size,
+                     uint64_t alignment)
+{
+   struct brw_bo *bo = calloc(1, sizeof(*bo));
+   if (!bo)
+      return NULL;
+
+   bo->bufmgr = bufmgr;
+   bo->name = name;
+   p_atomic_set(&bo->refcount, 1);
+
+   bo->size = size;
+   bo->align = alignment;
+   bo->map_cpu = ptr;
+   bo->userptr = true;
+   bo->reusable = false;
+   bo->cache_coherent = true;
+   bo->idle = true;
+
+   bo->tiling_mode = I915_TILING_NONE;
+   bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+   bo->stride = 0;
+
+   struct drm_i915_gem_userptr arg = {
+      .user_ptr = (uintptr_t)ptr,
+      .user_size = size,
+      .flags = 0,
+   };
+   if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg)) {
+      free(bo);
+      return NULL;
+   }
+
+   bo->gem_handle = arg.handle;
+
+   /* Check the buffer for validity before we try and use it in a batch */
+   struct drm_i915_gem_set_domain sd = {
+      .handle = bo->gem_handle,
+      .read_domains = I915_GEM_DOMAIN_CPU,
+   };
+   if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd))
+      goto err_free;
+
+   VG_DEFINED(ptr, size); /* Presume we write to it using the GPU */
+   return bo;
+
+err_free:
+   mtx_lock(&bufmgr->lock);
+   bo_free(bo);
+   mtx_unlock(&bufmgr->lock);
+   return NULL;
+}
+
 /**
  * Returns a brw_bo wrapping the given buffer object handle.
  *
@@ -555,7 +620,7 @@ bo_free(struct brw_bo *bo)
    struct drm_gem_close close;
    int ret;
 
-   if (bo->map_cpu) {
+   if (bo->map_cpu && !bo->userptr) {
       VG_NOACCESS(bo->map_cpu, bo->size);
       drm_munmap(bo->map_cpu, bo->size);
    }
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index de0ba1dad1..48dc7e2ab3 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -165,6 +165,11 @@ struct brw_bo {
     * Boolean of whether this buffer is cache coherent
     */
    bool cache_coherent;
+
+   /**
+    * Boolean of whether this buffer is a userptr
+    */
+   bool userptr:1;
 };
 
 #define BO_ALLOC_BUSY       (1<<0)
@@ -198,6 +203,12 @@ struct brw_bo *brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr,
                                   uint32_t pitch,
                                   unsigned flags);
 
+struct brw_bo *brw_bo_alloc_userptr(struct brw_bufmgr *bufmgr,
+                                    const char *name,
+                                    void *ptr,
+                                    uint64_t size,
+                                    uint64_t alignment);
+
 /**
  * Allocate a tiled buffer object.
  *
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index d3bef25cd5..e1e83ddf1e 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1681,6 +1681,22 @@ intel_detect_swizzling(struct intel_screen *screen)
       return true;
 }
 
+static bool
+intel_detect_userptr(struct intel_screen *screen)
+{
+   struct drm_i915_gem_userptr arg = {
+      .user_ptr = -4096ULL,
+      .user_size = 8192, /* invalid 64b wrap around */
+   };
+
+   if (screen->devinfo.has_snoop_bug)
+      return false;
+
+   errno = 0;
+   drmIoctl(screen->driScrnPriv->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg);
+   return errno == EFAULT;
+}
+
 static int
 intel_detect_timestamp(struct intel_screen *screen)
 {
@@ -2442,6 +2458,10 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
       screen->kernel_features |= KERNEL_ALLOWS_EXEC_BATCH_FIRST;
    }
 
+   if (intel_detect_userptr(screen)) {
+      screen->kernel_features |= KERNEL_ALLOWS_USERPTR;
+   }
+
    if (!intel_detect_pipelined_so(screen)) {
       /* We can't do anything, so the effective version is 0. */
       screen->cmd_parser_version = 0;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index 41e1dbdd4e..b37c2dc40f 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -81,6 +81,7 @@ struct intel_screen
 #define KERNEL_ALLOWS_COMPUTE_DISPATCH              (1<<4)
 #define KERNEL_ALLOWS_EXEC_CAPTURE                  (1<<5)
 #define KERNEL_ALLOWS_EXEC_BATCH_FIRST              (1<<6)
+#define KERNEL_ALLOWS_USERPTR                       (1<<7)
 
    struct brw_bufmgr *bufmgr;
 
-- 
2.15.0.rc0



More information about the mesa-dev mailing list