[Mesa-dev] [PATCH 2/3] winsys/amdgpu: implement sync_file import/export
Marek Olšák
maraeo at gmail.com
Tue Sep 12 20:50:36 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
syncobj is used internally for interactions with command submission.
---
src/gallium/drivers/radeon/radeon_winsys.h | 12 +++
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 115 +++++++++++++++++++++++++++--
src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 18 ++++-
3 files changed, 138 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 99e22e0..2438ec2 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -590,20 +590,32 @@ struct radeon_winsys {
struct pipe_fence_handle *fence,
uint64_t timeout);
/**
* Reference counting for fences.
*/
void (*fence_reference)(struct pipe_fence_handle **dst,
struct pipe_fence_handle *src);
/**
+ * Create a new fence object corresponding to the given sync_file.
+ */
+ struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws,
+ int fd);
+
+ /**
+ * Return a sync_file FD corresponding to the given fence object.
+ */
+ int (*fence_export_sync_file)(struct radeon_winsys *ws,
+ struct pipe_fence_handle *fence);
+
+ /**
* Initialize surface
*
* \param ws The winsys this function is called from.
* \param tex Input texture description
* \param flags Bitmask of RADEON_SURF_* flags
* \param bpe Bytes per pixel, it can be different for Z buffers.
* \param mode Preferred tile mode. (linear, 1D, or 2D)
* \param surf Output structure
*/
int (*surface_init)(struct radeon_winsys *ws,
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 768a164..d9d2a8b 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -40,30 +40,86 @@ DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
/* FENCES */
static struct pipe_fence_handle *
amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
unsigned ip_instance, unsigned ring)
{
struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
fence->reference.count = 1;
+ fence->ws = ctx->ws;
fence->ctx = ctx;
fence->fence.context = ctx->ctx;
fence->fence.ip_type = ip_type;
fence->fence.ip_instance = ip_instance;
fence->fence.ring = ring;
fence->submission_in_progress = true;
p_atomic_inc(&ctx->refcount);
return (struct pipe_fence_handle *)fence;
}
+static struct pipe_fence_handle *
+amdgpu_fence_import_sync_file(struct radeon_winsys *rws, int fd)
+{
+ struct amdgpu_winsys *ws = amdgpu_winsys(rws);
+ struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
+
+ if (!fence)
+ return NULL;
+
+ pipe_reference_init(&fence->reference, 1);
+ fence->ws = ws;
+ /* fence->ctx == NULL means that the fence is syncobj-based. */
+
+ /* Convert sync_file into syncobj. */
+ int r = amdgpu_cs_create_syncobj(ws->dev, &fence->syncobj);
+ if (r) {
+ FREE(fence);
+ return NULL;
+ }
+
+ r = amdgpu_cs_syncobj_import_sync_file(ws->dev, fence->syncobj, fd);
+ if (r) {
+ amdgpu_cs_destroy_syncobj(ws->dev, fence->syncobj);
+ FREE(fence);
+ return NULL;
+ }
+ return (struct pipe_fence_handle*)fence;
+}
+
+static int amdgpu_fence_export_sync_file(struct radeon_winsys *rws,
+ struct pipe_fence_handle *pfence)
+{
+ struct amdgpu_winsys *ws = amdgpu_winsys(rws);
+ struct amdgpu_fence *fence = (struct amdgpu_fence*)pfence;
+
+ if (amdgpu_fence_is_syncobj(fence)) {
+ int fd, r;
+
+ /* Convert syncobj into sync_file. */
+ r = amdgpu_cs_syncobj_export_sync_file(ws->dev, fence->syncobj, &fd);
+ return r ? -1 : fd;
+ }
+
+ os_wait_until_zero(&fence->submission_in_progress, PIPE_TIMEOUT_INFINITE);
+
+ /* Convert the amdgpu fence into a fence FD. */
+ int fd;
+ if (amdgpu_cs_fence_to_handle(ws->dev, &fence->fence,
+ AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD,
+ (uint32_t*)&fd))
+ return -1;
+
+ return fd;
+}
+
static void amdgpu_fence_submitted(struct pipe_fence_handle *fence,
uint64_t seq_no,
uint64_t *user_fence_cpu_address)
{
struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
rfence->fence.fence = seq_no;
rfence->user_fence_cpu_address = user_fence_cpu_address;
rfence->submission_in_progress = false;
}
@@ -81,20 +137,35 @@ bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
{
struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
uint32_t expired;
int64_t abs_timeout;
uint64_t *user_fence_cpu;
int r;
if (rfence->signalled)
return true;
+ /* Handle syncobjs. */
+ if (amdgpu_fence_is_syncobj(rfence)) {
+ /* Absolute timeouts are only be used by BO fences, which aren't
+ * backed by syncobjs.
+ */
+ assert(!absolute);
+
+ if (amdgpu_cs_syncobj_wait(rfence->ws->dev, &rfence->syncobj, 1,
+ timeout, 0, NULL))
+ return false;
+
+ rfence->signalled = true;
+ return true;
+ }
+
if (absolute)
abs_timeout = timeout;
else
abs_timeout = os_time_get_absolute_timeout(timeout);
/* The fence might not have a number assigned if its IB is being
* submitted in the other thread right now. Wait until the submission
* is done. */
if (!os_wait_until_zero_abs_timeout(&rfence->submission_in_progress,
abs_timeout))
@@ -921,21 +992,22 @@ static unsigned add_fence_dependency_entry(struct amdgpu_cs_context *cs)
increment * sizeof(cs->fence_dependencies[0]));
}
return idx;
}
static bool is_noop_fence_dependency(struct amdgpu_cs *acs,
struct amdgpu_fence *fence)
{
struct amdgpu_cs_context *cs = acs->csc;
- if (fence->ctx == acs->ctx &&
+ if (!amdgpu_fence_is_syncobj(fence) &&
+ fence->ctx == acs->ctx &&
fence->fence.ip_type == cs->ib[IB_MAIN].ip_type &&
fence->fence.ip_instance == cs->ib[IB_MAIN].ip_instance &&
fence->fence.ring == cs->ib[IB_MAIN].ring)
return true;
return amdgpu_fence_wait((void *)fence, 0, false);
}
static void amdgpu_cs_add_fence_dependency(struct radeon_winsys_cs *rws,
struct pipe_fence_handle *pfence)
@@ -1174,21 +1246,21 @@ bo_list_error:
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
amdgpu_fence_signalled(cs->fence);
cs->error_code = r;
goto cleanup;
}
if (acs->ctx->num_rejected_cs) {
r = -ECANCELED;
} else {
- struct drm_amdgpu_cs_chunk chunks[3];
+ struct drm_amdgpu_cs_chunk chunks[4];
unsigned num_chunks = 0;
/* Convert from dwords to bytes. */
cs->ib[IB_MAIN].ib_bytes *= 4;
/* IB */
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN];
num_chunks++;
@@ -1196,38 +1268,69 @@ bo_list_error:
/* Fence */
if (has_user_fence) {
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE;
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
chunks[num_chunks].chunk_data = (uintptr_t)&acs->fence_chunk;
num_chunks++;
}
/* Dependencies */
unsigned num_dependencies = cs->num_fence_dependencies;
+ unsigned num_syncobj_dependencies = 0;
+
if (num_dependencies) {
struct drm_amdgpu_cs_chunk_dep *dep_chunk =
alloca(num_dependencies * sizeof(*dep_chunk));
+ unsigned num = 0;
for (unsigned i = 0; i < num_dependencies; i++) {
struct amdgpu_fence *fence =
(struct amdgpu_fence*)cs->fence_dependencies[i];
+ if (amdgpu_fence_is_syncobj(fence)) {
+ num_syncobj_dependencies++;
+ continue;
+ }
+
assert(!fence->submission_in_progress);
- amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]);
+ amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[num++]);
}
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
- chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 *
- num_dependencies;
+ chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num;
chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk;
num_chunks++;
}
+
+ /* Syncobj dependencies. */
+ if (num_syncobj_dependencies) {
+ struct drm_amdgpu_cs_chunk_sem *sem_chunk =
+ alloca(num_syncobj_dependencies * sizeof(sem_chunk[0]));
+ unsigned num = 0;
+
+ for (unsigned i = 0; i < num_dependencies; i++) {
+ struct amdgpu_fence *fence =
+ (struct amdgpu_fence*)cs->fence_dependencies[i];
+
+ if (!amdgpu_fence_is_syncobj(fence))
+ continue;
+
+ assert(!fence->submission_in_progress);
+ sem_chunk[num++].handle = fence->syncobj;
+ }
+
+ chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN;
+ chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 * num;
+ chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk;
+ num_chunks++;
+ }
+
assert(num_chunks <= ARRAY_SIZE(chunks));
r = amdgpu_cs_submit_raw(ws->dev, acs->ctx->ctx, bo_list,
num_chunks, chunks, &seq_no);
}
cs->error_code = r;
if (r) {
if (r == -ENOMEM)
fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
@@ -1423,11 +1526,13 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
ws->base.cs_validate = amdgpu_cs_validate;
ws->base.cs_check_space = amdgpu_cs_check_space;
ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list;
ws->base.cs_flush = amdgpu_cs_flush;
ws->base.cs_get_next_fence = amdgpu_cs_get_next_fence;
ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
ws->base.cs_sync_flush = amdgpu_cs_sync_flush;
ws->base.cs_add_fence_dependency = amdgpu_cs_add_fence_dependency;
ws->base.fence_wait = amdgpu_fence_wait_rel_timeout;
ws->base.fence_reference = amdgpu_fence_reference;
+ ws->base.fence_import_sync_file = amdgpu_fence_import_sync_file;
+ ws->base.fence_export_sync_file = amdgpu_fence_export_sync_file;
}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index de00912..21e1354 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -132,49 +132,63 @@ struct amdgpu_cs {
/* Flush CS. */
void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
void *flush_data;
struct util_queue_fence flush_completed;
struct pipe_fence_handle *next_fence;
};
struct amdgpu_fence {
struct pipe_reference reference;
+ /* If ctx == NULL, this fence is syncobj-based. */
+ uint32_t syncobj;
+ struct amdgpu_winsys *ws;
struct amdgpu_ctx *ctx; /* submission context */
struct amdgpu_cs_fence fence;
uint64_t *user_fence_cpu_address;
/* If the fence is unknown due to an IB still being submitted
* in the other thread. */
volatile int submission_in_progress; /* bool (int for atomicity) */
volatile int signalled; /* bool (int for atomicity) */
};
+static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence)
+{
+ return fence->ctx == NULL;
+}
+
static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx)
{
if (p_atomic_dec_zero(&ctx->refcount)) {
amdgpu_cs_ctx_free(ctx->ctx);
amdgpu_bo_free(ctx->user_fence_bo);
FREE(ctx);
}
}
static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
struct pipe_fence_handle *src)
{
struct amdgpu_fence **rdst = (struct amdgpu_fence **)dst;
struct amdgpu_fence *rsrc = (struct amdgpu_fence *)src;
if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
- amdgpu_ctx_unref((*rdst)->ctx);
- FREE(*rdst);
+ struct amdgpu_fence *fence = *rdst;
+
+ if (amdgpu_fence_is_syncobj(fence))
+ amdgpu_cs_destroy_syncobj(fence->ws->dev, fence->syncobj);
+ else
+ amdgpu_ctx_unref(fence->ctx);
+
+ FREE(fence);
}
*rdst = rsrc;
}
int amdgpu_lookup_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo);
static inline struct amdgpu_ib *
amdgpu_ib(struct radeon_winsys_cs *base)
{
return (struct amdgpu_ib *)base;
--
2.7.4
More information about the mesa-dev
mailing list