[Mesa-dev] [PATCH 18/18] i965: AMD_pinned_memory and userptr
Chris Wilson
chris at chris-wilson.co.uk
Mon Jul 6 03:33:23 PDT 2015
All GEN GPU can bind to any piece of memory (thanks UMA), and so through
a special ioctl we can map a chunk of page-aligned client memory into
the GPU address space. However, not all GEN are equal. Some have
cache-coherency between the CPU and the GPU, whilst the others are
incoherent and rely on snooping on explicit flushes to push/pull dirty
data. Whereas we can use client buffers as a general replacement for kernel
allocated buffers with LLC (cache coherency), using snooped buffers
behaves differently and so must be used with care.
AMD_pinned_memory supposes that the client memory buffer is suitable
for any general usage (e.g. vertex data, texture data) and so only on
LLC can we offer that extension.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
src/mesa/drivers/dri/i965/brw_batch.c | 36 +++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_batch.h | 8 +++++
src/mesa/drivers/dri/i965/intel_buffer_objects.c | 40 +++++++++++++++++-------
src/mesa/drivers/dri/i965/intel_extensions.c | 8 +++++
src/mesa/drivers/dri/i965/intel_screen.c | 14 +++++++++
src/mesa/drivers/dri/i965/intel_screen.h | 2 +-
6 files changed, 96 insertions(+), 12 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c
index e01a0c4..1f73148 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_batch.c
@@ -1323,6 +1323,42 @@ struct brw_bo *brw_bo_create(struct brw_batch *batch,
return bo;
}
+/*
+ * Wrap the chunk of client memory given by ptr+size inside a GPU
+ * buffer, and make it cache coherent (though on non-LLC architectures
+ * this requires snooping on explicit cache flushes). This allows the
+ * caller to write into the memory chunk and for those writes to be
+ * visible on the GPU (exactly as if they create the buffer and then
+ * persistently mapped it to obtain the pointer).
+ */
+struct brw_bo *brw_bo_create_userptr(struct brw_batch *batch,
+ const char *name,
+ void *ptr,
+ uint64_t size,
+ uint64_t alignment)
+{
+ drm_intel_bo *base;
+ struct brw_bo *bo;
+
+ base = drm_intel_bo_alloc_userptr(batch->bufmgr, name,
+ ptr, I915_TILING_NONE, 0, size, 0);
+ if (base == NULL)
+ return NULL;
+
+ base->align = alignment;
+ bo = brw_bo_import(batch, base, false);
+ if (bo == NULL) {
+ drm_intel_bo_unreference(base);
+ return NULL;
+ }
+
+ bo->cache_coherent = true;
+ bo->reusable = false;
+ list_move(&bo->link, &bo->batch->inactive);
+
+ return bo;
+}
+
static bool __brw_bo_set_caching(struct brw_bo *bo, int caching)
{
struct drm_i915_gem_caching arg;
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h
index 3628b03..3ee9b20 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -215,6 +215,14 @@ brw_bo_create_tiled(struct brw_batch *batch,
uint32_t *pitch,
unsigned flags);
+/* Create a local brw_bo for GPU access to client memory */
+struct brw_bo *
+brw_bo_create_userptr(struct brw_batch *batch,
+ const char *name,
+ void *ptr,
+ uint64_t size,
+ uint64_t alignment);
+
/* Create a local brw_bo for a foreign buffer using its global flinked name */
struct brw_bo *brw_bo_create_from_name(struct brw_batch *batch,
const char *name,
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index 7110ce6..03d0982 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -58,14 +58,10 @@ mark_buffer_inactive(struct intel_buffer_object *intel_obj)
intel_obj->gpu_active_end = 0;
}
-/** Allocates a new brw_bo to store the data for the buffer object. */
static void
-alloc_buffer_object(struct brw_context *brw,
- struct intel_buffer_object *intel_obj)
+mark_new_state(struct brw_context *brw,
+ struct intel_buffer_object *intel_obj)
{
- intel_obj->buffer =
- brw_bo_create(&brw->batch, "bufferobj", intel_obj->Base.Size, 64, 0);
-
/* the buffer might be bound as a uniform buffer, need to update it
*/
if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
@@ -74,8 +70,18 @@ alloc_buffer_object(struct brw_context *brw,
brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
brw->ctx.NewDriverState |= BRW_NEW_ATOMIC_BUFFER;
+}
+
+/** Allocates a new brw_bo to store the data for the buffer object. */
+static void
+alloc_buffer_object(struct brw_context *brw,
+ struct intel_buffer_object *intel_obj)
+{
+ intel_obj->buffer =
+ brw_bo_create(&brw->batch, "bufferobj", intel_obj->Base.Size, 64, 0);
mark_buffer_inactive(intel_obj);
+ mark_new_state(brw, intel_obj);
}
static void
@@ -170,12 +176,24 @@ brw_buffer_data(struct gl_context *ctx,
release_buffer(intel_obj);
if (size != 0) {
- alloc_buffer_object(brw, intel_obj);
- if (!intel_obj->buffer)
- return false;
+ if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) {
+ intel_obj->buffer =
+ brw_bo_create_userptr(&brw->batch, "bufferobj(userptr)",
+ (void *)data, size, 0);
+ if (!intel_obj->buffer)
+ return false;
+ } else {
+ intel_obj->buffer =
+ brw_bo_create(&brw->batch, "bufferobj", size, 64, 0);
+ if (!intel_obj->buffer)
+ return false;
+
+ if (data != NULL)
+ brw_bo_write(intel_obj->buffer, 0, data, size, 0);
+ }
- if (data != NULL)
- brw_bo_write(intel_obj->buffer, 0, data, size, 0);
+ mark_buffer_inactive(intel_obj);
+ mark_new_state(brw, intel_obj);
}
return true;
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index a7b6056..3b9d17f 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -303,6 +303,14 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.EXT_transform_feedback = true;
ctx->Extensions.OES_depth_texture_cube_map = true;
+ ctx->Extensions.AMD_pinned_memory =
+ /* Flexibility of using client memory for any buffer (incl. vertex
+ * buffers) rules out the prospect of using snooped buffers, and
+ * using snooped buffers without cogniscience is likely to be
+ * detrimental to performance anyway.
+ */
+ brw->has_llc && brw->intelScreen->hw_has_userptr;
+
ctx->Extensions.ARB_timer_query = brw->intelScreen->hw_has_timestamp;
/* Only enable this in core profile because other parts of Mesa behave
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index fcf50fc..e587283 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1142,6 +1142,19 @@ intel_detect_swizzling(struct intel_screen *screen)
}
static bool
+intel_detect_userptr(struct intel_screen *screen)
+{
+ struct drm_i915_gem_userptr arg;
+
+ memset(&arg, 0, sizeof(arg));
+ arg.user_ptr = -4096ULL;
+ arg.user_size = 8192;
+ errno = 0;
+ drmIoctl(intel_screen_to_fd(screen), DRM_IOCTL_I915_GEM_USERPTR, &arg);
+ return errno == EFAULT;
+}
+
+static bool
intel_detect_timestamp(struct intel_screen *screen)
{
uint64_t dummy = 0;
@@ -1411,6 +1424,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen);
+ intelScreen->hw_has_userptr = intel_detect_userptr(intelScreen);
const char *force_msaa = getenv("INTEL_FORCE_MSAA");
if (force_msaa) {
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index 733654c..49b502d 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -55,8 +55,8 @@ struct intel_screen
bool hw_must_use_separate_stencil;
bool hw_has_swizzling;
-
bool hw_has_timestamp;
+ bool hw_has_userptr;
/**
* Does the kernel support context reset notifications?
--
2.1.4
More information about the mesa-dev
mailing list