[PATCH libdrm] amdgpu: revert semaphore support

Tue Jul 11 09:30:13 UTC 2017

This is not true.
Can we keep this for a while until we have sync object in place?

Thanks.
Best Regards,
David
> On 11 Jul 2017, at 5:28 PM, Christian König <deathsimple at vodafone.de> wrote:
> 
> I hoped that Dave Airlied will land it together with this patch.
> 
> As far as I know the closed source driver already doesn't use that any more either.
> 
> Regards,
> Christian.
> 
> Am 11.07.2017 um 11:20 schrieb Mao, David:
>> Hi Christian,
>> When will sync object support landed in upstream kernel, which version in specific?
>> We still rely on legacy semaphore implementation and we have to use it if sync object still takes time.
>> Thanks.
>> Best Regards,
>> David
>>> On 11 Jul 2017, at 5:15 PM, Christian König <deathsimple at vodafone.de> wrote:
>>> 
>>> From: Christian König <christian.koenig at amd.com>
>>> 
>>> This reverts commit 6b79c66b841dded6ffa6b56f14e4eb10a90a7c07
>>> and commit 6afadeaf13279fcdbc48999f522e1dc90a9dfdaf.
>>> 
>>> Semaphore support was never used by any open source project and
>>> not even widely by any closed source driver.
>>> 
>>> This should be replaced by sync object support.
>>> 
>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>> ---
>>> amdgpu/amdgpu.h          |  65 -------------
>>> amdgpu/amdgpu_cs.c       | 237 +----------------------------------------------
>>> amdgpu/amdgpu_internal.h |  15 ---
>>> 3 files changed, 5 insertions(+), 312 deletions(-)
>>> 
>>> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
>>> index 1901fa8..8bf57a4 100644
>>> --- a/amdgpu/amdgpu.h
>>> +++ b/amdgpu/amdgpu.h
>>> @@ -128,11 +128,6 @@ typedef struct amdgpu_bo_list *amdgpu_bo_list_handle;
>>>  */
>>> typedef struct amdgpu_va *amdgpu_va_handle;
>>> 
>>> -/**
>>> - * Define handle for semaphore
>>> - */
>>> -typedef struct amdgpu_semaphore *amdgpu_semaphore_handle;
>>> -
>>> /*--------------------------------------------------------------------------*/
>>> /* -------------------------- Structures ---------------------------------- */
>>> /*--------------------------------------------------------------------------*/
>>> @@ -1259,66 +1254,6 @@ int amdgpu_bo_va_op_raw(amdgpu_device_handle dev,
>>> 			uint32_t ops);
>>> 
>>> /**
>>> - *  create semaphore
>>> - *
>>> - * \param   sem	   - \c [out] semaphore handle
>>> - *
>>> - * \return   0 on success\n
>>> - *          <0 - Negative POSIX Error code
>>> - *
>>> -*/
>>> -int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem);
>>> -
>>> -/**
>>> - *  signal semaphore
>>> - *
>>> - * \param   context        - \c [in] GPU Context
>>> - * \param   ip_type        - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
>>> - * \param   ip_instance    - \c [in] Index of the IP block of the same type
>>> - * \param   ring           - \c [in] Specify ring index of the IP
>>> - * \param   sem	           - \c [in] semaphore handle
>>> - *
>>> - * \return   0 on success\n
>>> - *          <0 - Negative POSIX Error code
>>> - *
>>> -*/
>>> -int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
>>> -			       uint32_t ip_type,
>>> -			       uint32_t ip_instance,
>>> -			       uint32_t ring,
>>> -			       amdgpu_semaphore_handle sem);
>>> -
>>> -/**
>>> - *  wait semaphore
>>> - *
>>> - * \param   context        - \c [in] GPU Context
>>> - * \param   ip_type        - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
>>> - * \param   ip_instance    - \c [in] Index of the IP block of the same type
>>> - * \param   ring           - \c [in] Specify ring index of the IP
>>> - * \param   sem	           - \c [in] semaphore handle
>>> - *
>>> - * \return   0 on success\n
>>> - *          <0 - Negative POSIX Error code
>>> - *
>>> -*/
>>> -int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
>>> -			     uint32_t ip_type,
>>> -			     uint32_t ip_instance,
>>> -			     uint32_t ring,
>>> -			     amdgpu_semaphore_handle sem);
>>> -
>>> -/**
>>> - *  destroy semaphore
>>> - *
>>> - * \param   sem	    - \c [in] semaphore handle
>>> - *
>>> - * \return   0 on success\n
>>> - *          <0 - Negative POSIX Error code
>>> - *
>>> -*/
>>> -int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
>>> -
>>> -/**
>>>  *  Get the ASIC marketing name
>>>  *
>>>  * \param   dev         - \c [in] Device handle. See #amdgpu_device_initialize()
>>> diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
>>> index 868eb7b..c0794d2 100644
>>> --- a/amdgpu/amdgpu_cs.c
>>> +++ b/amdgpu/amdgpu_cs.c
>>> @@ -40,9 +40,6 @@
>>> #include "amdgpu_drm.h"
>>> #include "amdgpu_internal.h"
>>> 
>>> -static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem);
>>> -static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
>>> -
>>> /**
>>>  * Create command submission context
>>>  *
>>> @@ -56,7 +53,6 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
>>> {
>>> 	struct amdgpu_context *gpu_context;
>>> 	union drm_amdgpu_ctx args;
>>> -	int i, j, k;
>>> 	int r;
>>> 
>>> 	if (!dev || !context)
>>> @@ -68,10 +64,6 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
>>> 
>>> 	gpu_context->dev = dev;
>>> 
>>> -	r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
>>> -	if (r)
>>> -		goto error;
>>> -
>>> 	/* Create the context */
>>> 	memset(&args, 0, sizeof(args));
>>> 	args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
>>> @@ -80,16 +72,11 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
>>> 		goto error;
>>> 
>>> 	gpu_context->id = args.out.alloc.ctx_id;
>>> -	for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
>>> -		for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++)
>>> -			for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++)
>>> -				list_inithead(&gpu_context->sem_list[i][j][k]);
>>> 	*context = (amdgpu_context_handle)gpu_context;
>>> 
>>> 	return 0;
>>> 
>>> error:
>>> -	pthread_mutex_destroy(&gpu_context->sequence_mutex);
>>> 	free(gpu_context);
>>> 	return r;
>>> }
>>> @@ -105,32 +92,18 @@ error:
>>> int amdgpu_cs_ctx_free(amdgpu_context_handle context)
>>> {
>>> 	union drm_amdgpu_ctx args;
>>> -	int i, j, k;
>>> 	int r;
>>> 
>>> 	if (!context)
>>> 		return -EINVAL;
>>> 
>>> -	pthread_mutex_destroy(&context->sequence_mutex);
>>> -
>>> 	/* now deal with kernel side */
>>> 	memset(&args, 0, sizeof(args));
>>> 	args.in.op = AMDGPU_CTX_OP_FREE_CTX;
>>> 	args.in.ctx_id = context->id;
>>> 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
>>> 				&args, sizeof(args));
>>> -	for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
>>> -		for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) {
>>> -			for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) {
>>> -				amdgpu_semaphore_handle sem;
>>> -				LIST_FOR_EACH_ENTRY(sem, &context->sem_list[i][j][k], list) {
>>> -					list_del(&sem->list);
>>> -					amdgpu_cs_reset_sem(sem);
>>> -					amdgpu_cs_unreference_sem(sem);
>>> -				}
>>> -			}
>>> -		}
>>> -	}
>>> +
>>> 	free(context);
>>> 
>>> 	return r;
>>> @@ -175,10 +148,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>>> 	struct drm_amdgpu_cs_chunk *chunks;
>>> 	struct drm_amdgpu_cs_chunk_data *chunk_data;
>>> 	struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
>>> -	struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
>>> -	struct list_head *sem_list;
>>> -	amdgpu_semaphore_handle sem, tmp;
>>> -	uint32_t i, size, sem_count = 0;
>>> +	uint32_t i, size;
>>> 	bool user_fence;
>>> 	int r = 0;
>>> 
>>> @@ -194,7 +164,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>>> 	}
>>> 	user_fence = (ibs_request->fence_info.handle != NULL);
>>> 
>>> -	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
>>> +	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1);
>>> 
>>> 	chunk_array = alloca(sizeof(uint64_t) * size);
>>> 	chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
>>> @@ -228,8 +198,6 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>>> 		chunk_data[i].ib_data.flags = ib->flags;
>>> 	}
>>> 
>>> -	pthread_mutex_lock(&context->sequence_mutex);
>>> -
>>> 	if (user_fence) {
>>> 		i = cs.in.num_chunks++;
>>> 
>>> @@ -274,49 +242,15 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>>> 		chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
>>> 	}
>>> 
>>> -	sem_list = &context->sem_list[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring];
>>> -	LIST_FOR_EACH_ENTRY(sem, sem_list, list)
>>> -		sem_count++;
>>> -	if (sem_count) {
>>> -		sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_count);
>>> -		if (!sem_dependencies) {
>>> -			r = -ENOMEM;
>>> -			goto error_unlock;
>>> -		}
>>> -		sem_count = 0;
>>> -		LIST_FOR_EACH_ENTRY_SAFE(sem, tmp, sem_list, list) {
>>> -			struct amdgpu_cs_fence *info = &sem->signal_fence;
>>> -			struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
>>> -			dep->ip_type = info->ip_type;
>>> -			dep->ip_instance = info->ip_instance;
>>> -			dep->ring = info->ring;
>>> -			dep->ctx_id = info->context->id;
>>> -			dep->handle = info->fence;
>>> -
>>> -			list_del(&sem->list);
>>> -			amdgpu_cs_reset_sem(sem);
>>> -			amdgpu_cs_unreference_sem(sem);
>>> -		}
>>> -		i = cs.in.num_chunks++;
>>> -
>>> -		/* dependencies chunk */
>>> -		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
>>> -		chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
>>> -		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
>>> -		chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
>>> -	}
>>> -
>>> 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
>>> 				&cs, sizeof(cs));
>>> 	if (r)
>>> 		goto error_unlock;
>>> 
>>> 	ibs_request->seq_no = cs.out.handle;
>>> -	context->last_seq[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring] = ibs_request->seq_no;
>>> +
>>> error_unlock:
>>> -	pthread_mutex_unlock(&context->sequence_mutex);
>>> 	free(dependencies);
>>> -	free(sem_dependencies);
>>> 	return r;
>>> }
>>> 
>>> @@ -429,170 +363,9 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
>>> 				fence->ip_instance, fence->ring,
>>> 			       	fence->fence, timeout_ns, flags, &busy);
>>> 
>>> +
>>> 	if (!r && !busy)
>>> 		*expired = true;
>>> 
>>> 	return r;
>>> }
>>> -
>>> -static int amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence *fences,
>>> -				    uint32_t fence_count,
>>> -				    bool wait_all,
>>> -				    uint64_t timeout_ns,
>>> -				    uint32_t *status,
>>> -				    uint32_t *first)
>>> -{
>>> -	struct drm_amdgpu_fence *drm_fences;
>>> -	amdgpu_device_handle dev = fences[0].context->dev;
>>> -	union drm_amdgpu_wait_fences args;
>>> -	int r;
>>> -	uint32_t i;
>>> -
>>> -	drm_fences = alloca(sizeof(struct drm_amdgpu_fence) * fence_count);
>>> -	for (i = 0; i < fence_count; i++) {
>>> -		drm_fences[i].ctx_id = fences[i].context->id;
>>> -		drm_fences[i].ip_type = fences[i].ip_type;
>>> -		drm_fences[i].ip_instance = fences[i].ip_instance;
>>> -		drm_fences[i].ring = fences[i].ring;
>>> -		drm_fences[i].seq_no = fences[i].fence;
>>> -	}
>>> -
>>> -	memset(&args, 0, sizeof(args));
>>> -	args.in.fences = (uint64_t)(uintptr_t)drm_fences;
>>> -	args.in.fence_count = fence_count;
>>> -	args.in.wait_all = wait_all;
>>> -	args.in.timeout_ns = amdgpu_cs_calculate_timeout(timeout_ns);
>>> -
>>> -	r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_FENCES, &args);
>>> -	if (r)
>>> -		return -errno;
>>> -
>>> -	*status = args.out.status;
>>> -
>>> -	if (first)
>>> -		*first = args.out.first_signaled;
>>> -
>>> -	return 0;
>>> -}
>>> -
>>> -int amdgpu_cs_wait_fences(struct amdgpu_cs_fence *fences,
>>> -			  uint32_t fence_count,
>>> -			  bool wait_all,
>>> -			  uint64_t timeout_ns,
>>> -			  uint32_t *status,
>>> -			  uint32_t *first)
>>> -{
>>> -	uint32_t i;
>>> -
>>> -	/* Sanity check */
>>> -	if (!fences || !status || !fence_count)
>>> -		return -EINVAL;
>>> -
>>> -	for (i = 0; i < fence_count; i++) {
>>> -		if (NULL == fences[i].context)
>>> -			return -EINVAL;
>>> -		if (fences[i].ip_type >= AMDGPU_HW_IP_NUM)
>>> -			return -EINVAL;
>>> -		if (fences[i].ring >= AMDGPU_CS_MAX_RINGS)
>>> -			return -EINVAL;
>>> -	}
>>> -
>>> -	*status = 0;
>>> -
>>> -	return amdgpu_ioctl_wait_fences(fences, fence_count, wait_all,
>>> -					timeout_ns, status, first);
>>> -}
>>> -
>>> -int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
>>> -{
>>> -	struct amdgpu_semaphore *gpu_semaphore;
>>> -
>>> -	if (!sem)
>>> -		return -EINVAL;
>>> -
>>> -	gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
>>> -	if (!gpu_semaphore)
>>> -		return -ENOMEM;
>>> -
>>> -	atomic_set(&gpu_semaphore->refcount, 1);
>>> -	*sem = gpu_semaphore;
>>> -
>>> -	return 0;
>>> -}
>>> -
>>> -int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
>>> -			       uint32_t ip_type,
>>> -			       uint32_t ip_instance,
>>> -			       uint32_t ring,
>>> -			       amdgpu_semaphore_handle sem)
>>> -{
>>> -	if (!ctx || !sem)
>>> -		return -EINVAL;
>>> -	if (ip_type >= AMDGPU_HW_IP_NUM)
>>> -		return -EINVAL;
>>> -	if (ring >= AMDGPU_CS_MAX_RINGS)
>>> -		return -EINVAL;
>>> -	/* sem has been signaled */
>>> -	if (sem->signal_fence.context)
>>> -		return -EINVAL;
>>> -	pthread_mutex_lock(&ctx->sequence_mutex);
>>> -	sem->signal_fence.context = ctx;
>>> -	sem->signal_fence.ip_type = ip_type;
>>> -	sem->signal_fence.ip_instance = ip_instance;
>>> -	sem->signal_fence.ring = ring;
>>> -	sem->signal_fence.fence = ctx->last_seq[ip_type][ip_instance][ring];
>>> -	update_references(NULL, &sem->refcount);
>>> -	pthread_mutex_unlock(&ctx->sequence_mutex);
>>> -	return 0;
>>> -}
>>> -
>>> -int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
>>> -			     uint32_t ip_type,
>>> -			     uint32_t ip_instance,
>>> -			     uint32_t ring,
>>> -			     amdgpu_semaphore_handle sem)
>>> -{
>>> -	if (!ctx || !sem)
>>> -		return -EINVAL;
>>> -	if (ip_type >= AMDGPU_HW_IP_NUM)
>>> -		return -EINVAL;
>>> -	if (ring >= AMDGPU_CS_MAX_RINGS)
>>> -		return -EINVAL;
>>> -	/* must signal first */
>>> -	if (!sem->signal_fence.context)
>>> -		return -EINVAL;
>>> -
>>> -	pthread_mutex_lock(&ctx->sequence_mutex);
>>> -	list_add(&sem->list, &ctx->sem_list[ip_type][ip_instance][ring]);
>>> -	pthread_mutex_unlock(&ctx->sequence_mutex);
>>> -	return 0;
>>> -}
>>> -
>>> -static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)
>>> -{
>>> -	if (!sem || !sem->signal_fence.context)
>>> -		return -EINVAL;
>>> -
>>> -	sem->signal_fence.context = NULL;;
>>> -	sem->signal_fence.ip_type = 0;
>>> -	sem->signal_fence.ip_instance = 0;
>>> -	sem->signal_fence.ring = 0;
>>> -	sem->signal_fence.fence = 0;
>>> -
>>> -	return 0;
>>> -}
>>> -
>>> -static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)
>>> -{
>>> -	if (!sem)
>>> -		return -EINVAL;
>>> -
>>> -	if (update_references(&sem->refcount, NULL))
>>> -		free(sem);
>>> -	return 0;
>>> -}
>>> -
>>> -int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
>>> -{
>>> -	return amdgpu_cs_unreference_sem(sem);
>>> -}
>>> diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h
>>> index e68246b..8fb7ec6 100644
>>> --- a/amdgpu/amdgpu_internal.h
>>> +++ b/amdgpu/amdgpu_internal.h
>>> @@ -120,23 +120,8 @@ struct amdgpu_bo_list {
>>> 
>>> struct amdgpu_context {
>>> 	struct amdgpu_device *dev;
>>> -	/** Mutex for accessing fences and to maintain command submissions
>>> -	    in good sequence. */
>>> -	pthread_mutex_t sequence_mutex;
>>> 	/* context id*/
>>> 	uint32_t id;
>>> -	uint64_t last_seq[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
>>> -	struct list_head sem_list[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
>>> -};
>>> -
>>> -/**
>>> - * Structure describing sw semaphore based on scheduler
>>> - *
>>> - */
>>> -struct amdgpu_semaphore {
>>> -	atomic_t refcount;
>>> -	struct list_head list;
>>> -	struct amdgpu_cs_fence signal_fence;
>>> };
>>> 
>>> /**
>>> -- 
>>> 2.7.4
>>> 
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
>