[PATCH 06/10] amdgpu: add semaphore support
Marek Olšák
maraeo at gmail.com
Tue Jan 12 13:23:52 PST 2016
From: Chunming Zhou <david1.zhou at amd.com>
the semaphore is a binary semaphore. the work flow is:
1. create sem
2. signal sem
3. wait sem, reset sem after signalled
4. destroy sem.
Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
Reviewed-by: Jammy Zhou <Jammy.Zhou at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
---
amdgpu/amdgpu.h | 65 +++++++++++++++++++
amdgpu/amdgpu_cs.c | 166 +++++++++++++++++++++++++++++++++++++++++++++--
amdgpu/amdgpu_internal.h | 12 ++++
3 files changed, 239 insertions(+), 4 deletions(-)
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 9ae6ca3..8822a0c 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -124,6 +124,11 @@ typedef struct amdgpu_bo_list *amdgpu_bo_list_handle;
*/
typedef struct amdgpu_va *amdgpu_va_handle;
+/**
+ * Define handle for semaphore
+ */
+typedef struct amdgpu_semaphore *amdgpu_semaphore_handle;
+
/*--------------------------------------------------------------------------*/
/* -------------------------- Structures ---------------------------------- */
/*--------------------------------------------------------------------------*/
@@ -1202,4 +1207,64 @@ int amdgpu_bo_va_op(amdgpu_bo_handle bo,
uint64_t flags,
uint32_t ops);
+/**
+ * create semaphore
+ *
+ * \param sem - \c [out] semaphore handle
+ *
+ * \return 0 on success\n
+ * <0 - Negative POSIX Error code
+ *
+*/
+int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem);
+
+/**
+ * signal semaphore
+ *
+ * \param context - \c [in] GPU Context
+ * \param ip_type - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
+ * \param ip_instance - \c [in] Index of the IP block of the same type
+ * \param ring - \c [in] Specify ring index of the IP
+ * \param sem - \c [in] semaphore handle
+ *
+ * \return 0 on success\n
+ * <0 - Negative POSIX Error code
+ *
+*/
+int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
+ uint32_t ip_type,
+ uint32_t ip_instance,
+ uint32_t ring,
+ amdgpu_semaphore_handle sem);
+
+/**
+ * wait semaphore
+ *
+ * \param context - \c [in] GPU Context
+ * \param ip_type - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
+ * \param ip_instance - \c [in] Index of the IP block of the same type
+ * \param ring - \c [in] Specify ring index of the IP
+ * \param sem - \c [in] semaphore handle
+ *
+ * \return 0 on success\n
+ * <0 - Negative POSIX Error code
+ *
+*/
+int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
+ uint32_t ip_type,
+ uint32_t ip_instance,
+ uint32_t ring,
+ amdgpu_semaphore_handle sem);
+
+/**
+ * destroy semaphore
+ *
+ * \param sem - \c [in] semaphore handle
+ *
+ * \return 0 on success\n
+ * <0 - Negative POSIX Error code
+ *
+*/
+int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
+
#endif /* #ifdef _AMDGPU_H_ */
diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
index d5e4ea0..d033f8e 100644
--- a/amdgpu/amdgpu_cs.c
+++ b/amdgpu/amdgpu_cs.c
@@ -40,6 +40,9 @@
#include "amdgpu_drm.h"
#include "amdgpu_internal.h"
+static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem);
+static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
+
/**
* Create command submission context
*
@@ -53,6 +56,7 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
{
struct amdgpu_context *gpu_context;
union drm_amdgpu_ctx args;
+ int i, j, k;
int r;
if (NULL == dev)
@@ -78,6 +82,10 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
goto error;
gpu_context->id = args.out.alloc.ctx_id;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
+ for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++)
+ for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++)
+ list_inithead(&gpu_context->sem_list[i][j][k]);
*context = (amdgpu_context_handle)gpu_context;
return 0;
@@ -99,6 +107,7 @@ error:
int amdgpu_cs_ctx_free(amdgpu_context_handle context)
{
union drm_amdgpu_ctx args;
+ int i, j, k;
int r;
if (NULL == context)
@@ -112,7 +121,18 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context)
args.in.ctx_id = context->id;
r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
&args, sizeof(args));
-
+ for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
+ for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) {
+ for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) {
+ amdgpu_semaphore_handle sem;
+ LIST_FOR_EACH_ENTRY(sem, &context->sem_list[i][j][k], list) {
+ list_del(&sem->list);
+ amdgpu_cs_reset_sem(sem);
+ amdgpu_cs_unreference_sem(sem);
+ }
+ }
+ }
+ }
free(context);
return r;
@@ -157,7 +177,10 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
struct drm_amdgpu_cs_chunk *chunks;
struct drm_amdgpu_cs_chunk_data *chunk_data;
struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
- uint32_t i, size;
+ struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
+ struct list_head *sem_list;
+ amdgpu_semaphore_handle sem;
+ uint32_t i, size, sem_count = 0;
bool user_fence;
int r = 0;
@@ -169,7 +192,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
return -EINVAL;
user_fence = (ibs_request->fence_info.handle != NULL);
- size = ibs_request->number_of_ibs + (user_fence ? 2 : 1);
+ size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
chunk_array = alloca(sizeof(uint64_t) * size);
chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
@@ -249,16 +272,49 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
}
+ sem_list = &context->sem_list[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring];
+ LIST_FOR_EACH_ENTRY(sem, sem_list, list)
+ sem_count++;
+ if (sem_count) {
+ sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_count);
+ if (!sem_dependencies) {
+ r = -ENOMEM;
+ goto error_unlock;
+ }
+ sem_count = 0;
+ LIST_FOR_EACH_ENTRY(sem, sem_list, list) {
+ struct amdgpu_cs_fence *info = &sem->signal_fence;
+ struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
+ dep->ip_type = info->ip_type;
+ dep->ip_instance = info->ip_instance;
+ dep->ring = info->ring;
+ dep->ctx_id = info->context->id;
+ dep->handle = info->fence;
+
+ list_del(&sem->list);
+ amdgpu_cs_reset_sem(sem);
+ amdgpu_cs_unreference_sem(sem);
+ }
+ i = cs.in.num_chunks++;
+
+ /* dependencies chunk */
+ chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
+ chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
+ chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
+ chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
+ }
+
r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
&cs, sizeof(cs));
if (r)
goto error_unlock;
ibs_request->seq_no = cs.out.handle;
-
+ context->last_seq[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring] = ibs_request->seq_no;
error_unlock:
pthread_mutex_unlock(&context->sequence_mutex);
free(dependencies);
+ free(sem_dependencies);
return r;
}
@@ -450,3 +506,105 @@ int amdgpu_cs_wait_fences(struct amdgpu_cs_fence *fences,
return r;
}
+
+int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
+{
+ struct amdgpu_semaphore *gpu_semaphore;
+
+ if (NULL == sem)
+ return -EINVAL;
+
+ gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
+ if (NULL == gpu_semaphore)
+ return -ENOMEM;
+
+ atomic_set(&gpu_semaphore->refcount, 1);
+ *sem = gpu_semaphore;
+
+ return 0;
+}
+
+int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
+ uint32_t ip_type,
+ uint32_t ip_instance,
+ uint32_t ring,
+ amdgpu_semaphore_handle sem)
+{
+ if (NULL == ctx)
+ return -EINVAL;
+ if (ip_type >= AMDGPU_HW_IP_NUM)
+ return -EINVAL;
+ if (ring >= AMDGPU_CS_MAX_RINGS)
+ return -EINVAL;
+ if (NULL == sem)
+ return -EINVAL;
+ /* sem has been signaled */
+ if (sem->signal_fence.context)
+ return -EINVAL;
+ pthread_mutex_lock(&ctx->sequence_mutex);
+ sem->signal_fence.context = ctx;
+ sem->signal_fence.ip_type = ip_type;
+ sem->signal_fence.ip_instance = ip_instance;
+ sem->signal_fence.ring = ring;
+ sem->signal_fence.fence = ctx->last_seq[ip_type][ip_instance][ring];
+ update_references(NULL, &sem->refcount);
+ pthread_mutex_unlock(&ctx->sequence_mutex);
+ return 0;
+}
+
+int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
+ uint32_t ip_type,
+ uint32_t ip_instance,
+ uint32_t ring,
+ amdgpu_semaphore_handle sem)
+{
+ if (NULL == ctx)
+ return -EINVAL;
+ if (ip_type >= AMDGPU_HW_IP_NUM)
+ return -EINVAL;
+ if (ring >= AMDGPU_CS_MAX_RINGS)
+ return -EINVAL;
+ if (NULL == sem)
+ return -EINVAL;
+ /* must signal first */
+ if (NULL == sem->signal_fence.context)
+ return -EINVAL;
+
+ pthread_mutex_lock(&ctx->sequence_mutex);
+ list_add(&sem->list, &ctx->sem_list[ip_type][ip_instance][ring]);
+ pthread_mutex_unlock(&ctx->sequence_mutex);
+ return 0;
+}
+
+static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)
+{
+ if (NULL == sem)
+ return -EINVAL;
+ if (NULL == sem->signal_fence.context)
+ return -EINVAL;
+
+ sem->signal_fence.context = NULL;;
+ sem->signal_fence.ip_type = 0;
+ sem->signal_fence.ip_instance = 0;
+ sem->signal_fence.ring = 0;
+ sem->signal_fence.fence = 0;
+
+ return 0;
+}
+
+static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)
+{
+ if (NULL == sem)
+ return -EINVAL;
+
+ if (update_references(&sem->refcount, NULL))
+ free(sem);
+ return 0;
+}
+
+int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
+{
+ return amdgpu_cs_unreference_sem(sem);
+}
+
+
diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h
index 5d86603..557ba1f 100644
--- a/amdgpu/amdgpu_internal.h
+++ b/amdgpu/amdgpu_internal.h
@@ -116,6 +116,18 @@ struct amdgpu_context {
pthread_mutex_t sequence_mutex;
/* context id*/
uint32_t id;
+ uint64_t last_seq[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
+ struct list_head sem_list[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
+};
+
+/**
+ * Structure describing sw semaphore based on scheduler
+ *
+ */
+struct amdgpu_semaphore {
+ atomic_t refcount;
+ struct list_head list;
+ struct amdgpu_cs_fence signal_fence;
};
/**
--
2.1.4
More information about the dri-devel
mailing list