[Mesa-dev] [PATCH 3/4] i965: Expose AMD_pinned_memory
Chris Wilson
chris at chris-wilson.co.uk
Thu Aug 16 21:02:11 UTC 2018
All GEN GPU can bind to any piece of memory (thanks UMA), and so through
a special ioctl we can map a chunk of page-aligned client memory into
the GPU address space. However, not all GEN are equal. Some have
cache-coherency between the CPU and the GPU, whilst the others are
incoherent and rely on snooping on explicit flushes to push/pull dirty
data. Whereas we can use client buffers as a general replacement for kernel
allocated buffers with LLC (cache coherency), using snooped buffers
behaves differently and so must be used with care.
AMD_pinned_memory supposes that the client memory buffer is suitable
for any general usage (e.g. vertex data, texture data) and so only on
LLC can we offer that extension.
---
.../drivers/dri/i965/intel_buffer_objects.c | 68 +++++++++++++------
.../drivers/dri/i965/intel_buffer_objects.h | 6 ++
src/mesa/drivers/dri/i965/intel_extensions.c | 11 +++
3 files changed, 65 insertions(+), 20 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index 452e6d33c07..4b34b55793b 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -72,6 +72,23 @@ mark_buffer_invalid(struct intel_buffer_object *intel_obj)
intel_obj->valid_data_end = 0;
}
+/** Allocates a new brw_bo to store the data for the buffer object. */
+static void
+mark_new_state(struct brw_context *brw,
+ struct intel_buffer_object *intel_obj)
+{
+ /* the buffer might be bound as a uniform buffer, need to update it
+ */
+ if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
+ brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+ if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
+ brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+ if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
+ brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
+ if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
+ brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+}
+
/** Allocates a new brw_bo to store the data for the buffer object. */
static void
alloc_buffer_object(struct brw_context *brw,
@@ -96,20 +113,28 @@ alloc_buffer_object(struct brw_context *brw,
*/
size += 64 * 32; /* max read length of 64 256-bit units */
}
+
+ assert(!intel_obj->pinned);
intel_obj->buffer =
brw_bo_alloc(brw->bufmgr, "bufferobj", size, BRW_MEMZONE_OTHER);
- /* the buffer might be bound as a uniform buffer, need to update it
- */
- if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
- if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
- if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
- if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+ mark_new_state(brw, intel_obj);
+ mark_buffer_inactive(intel_obj);
+ mark_buffer_invalid(intel_obj);
+}
+
+static void
+alloc_userptr_object(struct brw_context *brw,
+ struct intel_buffer_object *intel_obj,
+ GLsizeiptrARB size,
+ const GLvoid *data)
+{
+ intel_obj->buffer =
+ brw_bo_alloc_userptr(brw->bufmgr, "bufferobj(userptr)",
+ (void *)data, size);
+ intel_obj->pinned = true;
+ mark_new_state(brw, intel_obj);
mark_buffer_inactive(intel_obj);
mark_buffer_invalid(intel_obj);
}
@@ -119,6 +144,7 @@ release_buffer(struct intel_buffer_object *intel_obj)
{
brw_bo_unreference(intel_obj->buffer);
intel_obj->buffer = NULL;
+ intel_obj->pinned = false;
}
/**
@@ -192,10 +218,6 @@ brw_buffer_data(struct gl_context *ctx,
struct brw_context *brw = brw_context(ctx);
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
- /* Part of the ABI, but this function doesn't use it.
- */
- (void) target;
-
intel_obj->Base.Size = size;
intel_obj->Base.Usage = usage;
intel_obj->Base.StorageFlags = storageFlags;
@@ -207,12 +229,16 @@ brw_buffer_data(struct gl_context *ctx,
release_buffer(intel_obj);
if (size != 0) {
- alloc_buffer_object(brw, intel_obj);
+ if (target != GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD)
+ alloc_buffer_object(brw, intel_obj);
+ else
+ alloc_userptr_object(brw, intel_obj, size, data);
if (!intel_obj->buffer)
return false;
if (data != NULL) {
- brw_bo_subdata(intel_obj->buffer, 0, size, data);
+ if (target != GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD)
+ brw_bo_subdata(intel_obj->buffer, 0, size, data);
mark_buffer_valid_data(intel_obj, 0, size);
}
}
@@ -275,9 +301,10 @@ brw_buffer_subdata(struct gl_context *ctx,
brw_batch_references(&brw->batch, intel_obj->buffer);
if (busy) {
- if (size == intel_obj->Base.Size ||
+ if (!intel_obj->pinned &&
+ (size == intel_obj->Base.Size ||
(intel_obj->valid_data_start >= offset &&
- intel_obj->valid_data_end <= offset + size)) {
+ intel_obj->valid_data_end <= offset + size))) {
/* Replace the current busy bo so the subdata doesn't stall. */
brw_bo_unreference(intel_obj->buffer);
alloc_buffer_object(brw, intel_obj);
@@ -425,7 +452,7 @@ brw_map_buffer_range(struct gl_context *ctx,
*/
if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
if (brw_batch_references(&brw->batch, intel_obj->buffer)) {
- if (access & GL_MAP_INVALIDATE_BUFFER_BIT) {
+ if (!intel_obj->pinned && access & GL_MAP_INVALIDATE_BUFFER_BIT) {
brw_bo_unreference(intel_obj->buffer);
alloc_buffer_object(brw, intel_obj);
} else {
@@ -433,7 +460,8 @@ brw_map_buffer_range(struct gl_context *ctx,
"object\n");
intel_batchbuffer_flush(brw);
}
- } else if (brw_bo_busy(intel_obj->buffer) &&
+ } else if (!intel_obj->pinned &&
+ brw_bo_busy(intel_obj->buffer) &&
(access & GL_MAP_INVALIDATE_BUFFER_BIT)) {
brw_bo_unreference(intel_obj->buffer);
alloc_buffer_object(brw, intel_obj);
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
index 849b231c8c0..072f71a0be1 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
@@ -87,6 +87,12 @@ struct intel_buffer_object
* cycle of blitting on buffer wraparound.
*/
bool prefer_stall_to_blit;
+
+ /**
+ * If this buffer wraps a chunk of client memory, we can not replace
+ * it with another buffer (of video memory) on a whim; it is pinned.
+ */
+ bool pinned;
/** @} */
};
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index f1c3aeff135..f0a425e8981 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -210,6 +210,17 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.EXT_disjoint_timer_query =
ctx->Extensions.ARB_timer_query;
+ /* AMD_pinned_memory assumes the flexibility of using client memory
+ * for any buffer (incl. vertex buffers) which rules out the prospect
+ * of using snooped buffers, as using snooped buffers without
+ * cogniscience is likely to be detrimental to performance and require
+ * extensive checking in the driver for correctness, e.g. to prevent
+ * illegal snoop <-> snoop transfers.
+ */
+ ctx->Extensions.AMD_pinned_memory =
+ brw->screen->kernel_features & KERNEL_ALLOWS_USERPTR &&
+ brw->screen->devinfo.has_llc;
+
/* Only enable this in core profile because other parts of Mesa behave
* slightly differently when the extension is enabled.
*/
--
2.18.0
More information about the mesa-dev
mailing list