[Mesa-dev] [PATCH 15/22] i965: AMD_pinned_memory and userptr
Chris Wilson
chris at chris-wilson.co.uk
Sat Aug 5 09:40:07 UTC 2017
All GEN GPU can bind to any piece of memory (thanks UMA), and so through
a special ioctl we can map a chunk of page-aligned client memory into
the GPU address space. However, not all GEN are equal. Some have
cache-coherency between the CPU and the GPU, whilst the others are
incoherent and rely on snooping on explicit flushes to push/pull dirty
data. Whereas we can use client buffers as a general replacement for kernel
allocated buffers with LLC (cache coherency), using snooped buffers
behaves differently and so must be used with care.
AMD_pinned_memory supposes that the client memory buffer is suitable
for any general usage (e.g. vertex data, texture data) and so only on
LLC can we offer that extension.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
src/mesa/drivers/dri/i965/brw_bufmgr.c | 68 +++++++++++++++++++++++-
src/mesa/drivers/dri/i965/brw_bufmgr.h | 11 ++++
src/mesa/drivers/dri/i965/intel_buffer_objects.c | 53 ++++++++++++------
src/mesa/drivers/dri/i965/intel_extensions.c | 9 ++++
src/mesa/drivers/dri/i965/intel_screen.c | 17 ++++++
src/mesa/drivers/dri/i965/intel_screen.h | 1 +
6 files changed, 141 insertions(+), 18 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 14e91468d1..8f81a8bd4a 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -481,6 +481,72 @@ brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name,
return bo_alloc_internal(bufmgr, name, size, flags, tiling, stride, 0);
}
+/*
+ * Wrap the chunk of client memory given by ptr+size inside a GPU
+ * buffer, and make it cache coherent (though on non-LLC architectures
+ * this requires snooping on explicit cache flushes). This allows the
+ * caller to write into the memory chunk and for those writes to be
+ * visible on the GPU (exactly as if they create the buffer and then
+ * persistently mapped it to obtain the pointer).
+ */
+struct brw_bo *
+brw_bo_alloc_userptr(struct brw_bufmgr *bufmgr,
+ const char *name,
+ void *ptr,
+ uint64_t size,
+ uint64_t alignment)
+{
+ struct brw_bo *bo = calloc(1, sizeof(*bo));
+ if (!bo)
+ return NULL;
+
+ bo->bufmgr = bufmgr;
+ bo->name = name;
+ p_atomic_set(&bo->refcount, 1);
+
+ bo->size = size;
+ bo->align = alignment;
+ bo->map_cpu = ptr;
+ bo->userptr = true;
+ bo->reusable = false;
+ bo->cache_coherent = true;
+ bo->idle = true;
+
+ bo->tiling_mode = I915_TILING_NONE;
+ bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+ bo->stride = 0;
+
+ struct drm_i915_gem_userptr arg = {
+ .user_ptr = (uintptr_t)ptr,
+ .user_size = size,
+ .flags = 0,
+ };
+ if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg)) {
+ free(bo);
+ return NULL;
+ }
+
+ bo->gem_handle = arg.handle;
+
+ /* Check the buffer for validity before we try and use it in a batch */
+ if (drmIoctl(bufmgr->fd,
+ DRM_IOCTL_I915_GEM_SET_DOMAIN,
+ &(struct drm_i915_gem_set_domain){
+ .handle = bo->gem_handle,
+ .read_domains = I915_GEM_DOMAIN_CPU,
+ }))
+ goto err_free;
+
+ VG_DEFINED(ptr, size); /* Presume we write to it using the GPU */
+ return bo;
+
+err_free:
+ pthread_mutex_lock(&bufmgr->lock);
+ bo_free(bo);
+ pthread_mutex_unlock(&bufmgr->lock);
+ return NULL;
+}
+
/**
* Returns a brw_bo wrapping the given buffer object handle.
*
@@ -574,7 +640,7 @@ bo_free(struct brw_bo *bo)
struct drm_gem_close close;
int ret;
- if (bo->map_cpu) {
+ if (bo->map_cpu && !bo->userptr) {
VG_NOACCESS(bo->map_cpu, bo->size);
drm_munmap(bo->map_cpu, bo->size);
}
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 45819c17c5..16c035f47b 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -143,6 +143,11 @@ struct brw_bo {
* Boolean of whether this buffer is cache coherent
*/
bool cache_coherent;
+
+ /**
+ * Boolean of whether this buffer is a userptr
+ */
+ bool userptr:1;
};
#define BO_ALLOC_FOR_RENDER (1<<0)
@@ -176,6 +181,12 @@ struct brw_bo *brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr,
uint32_t pitch,
unsigned flags);
+struct brw_bo *brw_bo_alloc_userptr(struct brw_bufmgr *bufmgr,
+ const char *name,
+ void *ptr,
+ uint64_t size,
+ uint64_t alignment);
+
/**
* Allocate a tiled buffer object.
*
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index ee59116828..2cb123973e 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -74,6 +74,23 @@ mark_buffer_invalid(struct intel_buffer_object *intel_obj)
/** Allocates a new brw_bo to store the data for the buffer object. */
static void
+mark_new_state(struct brw_context *brw,
+ struct intel_buffer_object *intel_obj)
+{
+ /* the buffer might be bound as a uniform buffer, need to update it
+ */
+ if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
+ brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+ if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
+ brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+ if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
+ brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
+ if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
+ brw->ctx.NewDriverState |= BRW_NEW_ATOMIC_BUFFER;
+}
+
+/** Allocates a new brw_bo to store the data for the buffer object. */
+static void
alloc_buffer_object(struct brw_context *brw,
struct intel_buffer_object *intel_obj)
{
@@ -98,17 +115,7 @@ alloc_buffer_object(struct brw_context *brw,
}
intel_obj->buffer = brw_bo_alloc(brw->bufmgr, "bufferobj", size, 64);
- /* the buffer might be bound as a uniform buffer, need to update it
- */
- if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
- if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
- if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
- if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_ATOMIC_BUFFER;
-
+ mark_new_state(brw, intel_obj);
mark_buffer_inactive(intel_obj);
mark_buffer_invalid(intel_obj);
}
@@ -206,13 +213,25 @@ brw_buffer_data(struct gl_context *ctx,
release_buffer(intel_obj);
if (size != 0) {
- alloc_buffer_object(brw, intel_obj);
- if (!intel_obj->buffer)
- return false;
-
- if (data != NULL) {
- brw_bo_subdata(intel_obj->buffer, 0, size, data);
+ if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) {
+ intel_obj->buffer =
+ brw_bo_alloc_userptr(brw->bufmgr, "bufferobj(userptr)",
+ (void *)data, size, 0);
+ if (!intel_obj->buffer)
+ return false;
+
+ mark_buffer_inactive(intel_obj);
mark_buffer_valid_data(intel_obj, 0, size);
+ mark_new_state(brw, intel_obj);
+ } else {
+ alloc_buffer_object(brw, intel_obj);
+ if (!intel_obj->buffer)
+ return false;
+
+ if (data != NULL) {
+ brw_bo_subdata(intel_obj->buffer, 0, size, data);
+ mark_buffer_valid_data(intel_obj, 0, size);
+ }
}
}
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index b91bbdc8d9..2514712cfa 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -193,6 +193,15 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_timer_query = brw->screen->hw_has_timestamp;
+ /* Flexibility of using client memory for any buffer (incl. vertex
+ * buffers) rules out the prospect of using snooped buffers, and
+ * using snooped buffers without cogniscience is likely to be
+ * detrimental to performance anyway.
+ */
+ ctx->Extensions.AMD_pinned_memory =
+ brw->screen->kernel_features & KERNEL_ALLOWS_USERPTR &&
+ brw->screen->devinfo.has_llc;
+
/* Only enable this in core profile because other parts of Mesa behave
* slightly differently when the extension is enabled.
*/
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index ec07cf0acc..3666b65bb6 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1547,6 +1547,19 @@ intel_detect_swizzling(struct intel_screen *screen)
return true;
}
+static bool
+intel_detect_userptr(struct intel_screen *screen)
+{
+ errno = 0;
+ drmIoctl(screen->driScrnPriv->fd,
+ DRM_IOCTL_I915_GEM_USERPTR,
+ &(struct drm_i915_gem_userptr){
+ .user_ptr = -4096ULL,
+ .user_size = 8192,
+ });
+ return errno == EFAULT;
+}
+
static int
intel_detect_timestamp(struct intel_screen *screen)
{
@@ -2271,6 +2284,10 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
screen->kernel_features |= KERNEL_ALLOWS_EXEC_BATCH_FIRST;
}
+ if (intel_detect_userptr(screen)) {
+ screen->kernel_features |= KERNEL_ALLOWS_USERPTR;
+ }
+
if (!intel_detect_pipelined_so(screen)) {
/* We can't do anything, so the effective version is 0. */
screen->cmd_parser_version = 0;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index 41e1dbdd4e..b37c2dc40f 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -81,6 +81,7 @@ struct intel_screen
#define KERNEL_ALLOWS_COMPUTE_DISPATCH (1<<4)
#define KERNEL_ALLOWS_EXEC_CAPTURE (1<<5)
#define KERNEL_ALLOWS_EXEC_BATCH_FIRST (1<<6)
+#define KERNEL_ALLOWS_USERPTR (1<<7)
struct brw_bufmgr *bufmgr;
--
2.13.3
More information about the mesa-dev
mailing list