[PATCH libdrm] amdgpu: revert semaphore support

Tue Jul 11 09:28:33 UTC 2017

I hoped that Dave Airlied will land it together with this patch.

As far as I know the closed source driver already doesn't use that any 
more either.

Regards,
Christian.

Am 11.07.2017 um 11:20 schrieb Mao, David:
> Hi Christian,
> When will sync object support landed in upstream kernel, which version in specific?
> We still rely on legacy semaphore implementation and we have to use it if sync object still takes time.
> Thanks.
> Best Regards,
> David
>> On 11 Jul 2017, at 5:15 PM, Christian König <deathsimple at vodafone.de> wrote:
>>
>> From: Christian König <christian.koenig at amd.com>
>>
>> This reverts commit 6b79c66b841dded6ffa6b56f14e4eb10a90a7c07
>> and commit 6afadeaf13279fcdbc48999f522e1dc90a9dfdaf.
>>
>> Semaphore support was never used by any open source project and
>> not even widely by any closed source driver.
>>
>> This should be replaced by sync object support.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>> amdgpu/amdgpu.h          |  65 -------------
>> amdgpu/amdgpu_cs.c       | 237 +----------------------------------------------
>> amdgpu/amdgpu_internal.h |  15 ---
>> 3 files changed, 5 insertions(+), 312 deletions(-)
>>
>> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
>> index 1901fa8..8bf57a4 100644
>> --- a/amdgpu/amdgpu.h
>> +++ b/amdgpu/amdgpu.h
>> @@ -128,11 +128,6 @@ typedef struct amdgpu_bo_list *amdgpu_bo_list_handle;
>>   */
>> typedef struct amdgpu_va *amdgpu_va_handle;
>>
>> -/**
>> - * Define handle for semaphore
>> - */
>> -typedef struct amdgpu_semaphore *amdgpu_semaphore_handle;
>> -
>> /*--------------------------------------------------------------------------*/
>> /* -------------------------- Structures ---------------------------------- */
>> /*--------------------------------------------------------------------------*/
>> @@ -1259,66 +1254,6 @@ int amdgpu_bo_va_op_raw(amdgpu_device_handle dev,
>> 			uint32_t ops);
>>
>> /**
>> - *  create semaphore
>> - *
>> - * \param   sem	   - \c [out] semaphore handle
>> - *
>> - * \return   0 on success\n
>> - *          <0 - Negative POSIX Error code
>> - *
>> -*/
>> -int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem);
>> -
>> -/**
>> - *  signal semaphore
>> - *
>> - * \param   context        - \c [in] GPU Context
>> - * \param   ip_type        - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
>> - * \param   ip_instance    - \c [in] Index of the IP block of the same type
>> - * \param   ring           - \c [in] Specify ring index of the IP
>> - * \param   sem	           - \c [in] semaphore handle
>> - *
>> - * \return   0 on success\n
>> - *          <0 - Negative POSIX Error code
>> - *
>> -*/
>> -int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
>> -			       uint32_t ip_type,
>> -			       uint32_t ip_instance,
>> -			       uint32_t ring,
>> -			       amdgpu_semaphore_handle sem);
>> -
>> -/**
>> - *  wait semaphore
>> - *
>> - * \param   context        - \c [in] GPU Context
>> - * \param   ip_type        - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
>> - * \param   ip_instance    - \c [in] Index of the IP block of the same type
>> - * \param   ring           - \c [in] Specify ring index of the IP
>> - * \param   sem	           - \c [in] semaphore handle
>> - *
>> - * \return   0 on success\n
>> - *          <0 - Negative POSIX Error code
>> - *
>> -*/
>> -int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
>> -			     uint32_t ip_type,
>> -			     uint32_t ip_instance,
>> -			     uint32_t ring,
>> -			     amdgpu_semaphore_handle sem);
>> -
>> -/**
>> - *  destroy semaphore
>> - *
>> - * \param   sem	    - \c [in] semaphore handle
>> - *
>> - * \return   0 on success\n
>> - *          <0 - Negative POSIX Error code
>> - *
>> -*/
>> -int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
>> -
>> -/**
>>   *  Get the ASIC marketing name
>>   *
>>   * \param   dev         - \c [in] Device handle. See #amdgpu_device_initialize()
>> diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
>> index 868eb7b..c0794d2 100644
>> --- a/amdgpu/amdgpu_cs.c
>> +++ b/amdgpu/amdgpu_cs.c
>> @@ -40,9 +40,6 @@
>> #include "amdgpu_drm.h"
>> #include "amdgpu_internal.h"
>>
>> -static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem);
>> -static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
>> -
>> /**
>>   * Create command submission context
>>   *
>> @@ -56,7 +53,6 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
>> {
>> 	struct amdgpu_context *gpu_context;
>> 	union drm_amdgpu_ctx args;
>> -	int i, j, k;
>> 	int r;
>>
>> 	if (!dev || !context)
>> @@ -68,10 +64,6 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
>>
>> 	gpu_context->dev = dev;
>>
>> -	r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
>> -	if (r)
>> -		goto error;
>> -
>> 	/* Create the context */
>> 	memset(&args, 0, sizeof(args));
>> 	args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
>> @@ -80,16 +72,11 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
>> 		goto error;
>>
>> 	gpu_context->id = args.out.alloc.ctx_id;
>> -	for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
>> -		for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++)
>> -			for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++)
>> -				list_inithead(&gpu_context->sem_list[i][j][k]);
>> 	*context = (amdgpu_context_handle)gpu_context;
>>
>> 	return 0;
>>
>> error:
>> -	pthread_mutex_destroy(&gpu_context->sequence_mutex);
>> 	free(gpu_context);
>> 	return r;
>> }
>> @@ -105,32 +92,18 @@ error:
>> int amdgpu_cs_ctx_free(amdgpu_context_handle context)
>> {
>> 	union drm_amdgpu_ctx args;
>> -	int i, j, k;
>> 	int r;
>>
>> 	if (!context)
>> 		return -EINVAL;
>>
>> -	pthread_mutex_destroy(&context->sequence_mutex);
>> -
>> 	/* now deal with kernel side */
>> 	memset(&args, 0, sizeof(args));
>> 	args.in.op = AMDGPU_CTX_OP_FREE_CTX;
>> 	args.in.ctx_id = context->id;
>> 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
>> 				&args, sizeof(args));
>> -	for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
>> -		for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) {
>> -			for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) {
>> -				amdgpu_semaphore_handle sem;
>> -				LIST_FOR_EACH_ENTRY(sem, &context->sem_list[i][j][k], list) {
>> -					list_del(&sem->list);
>> -					amdgpu_cs_reset_sem(sem);
>> -					amdgpu_cs_unreference_sem(sem);
>> -				}
>> -			}
>> -		}
>> -	}
>> +
>> 	free(context);
>>
>> 	return r;
>> @@ -175,10 +148,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>> 	struct drm_amdgpu_cs_chunk *chunks;
>> 	struct drm_amdgpu_cs_chunk_data *chunk_data;
>> 	struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
>> -	struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
>> -	struct list_head *sem_list;
>> -	amdgpu_semaphore_handle sem, tmp;
>> -	uint32_t i, size, sem_count = 0;
>> +	uint32_t i, size;
>> 	bool user_fence;
>> 	int r = 0;
>>
>> @@ -194,7 +164,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>> 	}
>> 	user_fence = (ibs_request->fence_info.handle != NULL);
>>
>> -	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
>> +	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1);
>>
>> 	chunk_array = alloca(sizeof(uint64_t) * size);
>> 	chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
>> @@ -228,8 +198,6 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>> 		chunk_data[i].ib_data.flags = ib->flags;
>> 	}
>>
>> -	pthread_mutex_lock(&context->sequence_mutex);
>> -
>> 	if (user_fence) {
>> 		i = cs.in.num_chunks++;
>>
>> @@ -274,49 +242,15 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>> 		chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
>> 	}
>>
>> -	sem_list = &context->sem_list[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring];
>> -	LIST_FOR_EACH_ENTRY(sem, sem_list, list)
>> -		sem_count++;
>> -	if (sem_count) {
>> -		sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_count);
>> -		if (!sem_dependencies) {
>> -			r = -ENOMEM;
>> -			goto error_unlock;
>> -		}
>> -		sem_count = 0;
>> -		LIST_FOR_EACH_ENTRY_SAFE(sem, tmp, sem_list, list) {
>> -			struct amdgpu_cs_fence *info = &sem->signal_fence;
>> -			struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
>> -			dep->ip_type = info->ip_type;
>> -			dep->ip_instance = info->ip_instance;
>> -			dep->ring = info->ring;
>> -			dep->ctx_id = info->context->id;
>> -			dep->handle = info->fence;
>> -
>> -			list_del(&sem->list);
>> -			amdgpu_cs_reset_sem(sem);
>> -			amdgpu_cs_unreference_sem(sem);
>> -		}
>> -		i = cs.in.num_chunks++;
>> -
>> -		/* dependencies chunk */
>> -		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
>> -		chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
>> -		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
>> -		chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
>> -	}
>> -
>> 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
>> 				&cs, sizeof(cs));
>> 	if (r)
>> 		goto error_unlock;
>>
>> 	ibs_request->seq_no = cs.out.handle;
>> -	context->last_seq[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring] = ibs_request->seq_no;
>> +
>> error_unlock:
>> -	pthread_mutex_unlock(&context->sequence_mutex);
>> 	free(dependencies);
>> -	free(sem_dependencies);
>> 	return r;
>> }
>>
>> @@ -429,170 +363,9 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
>> 				fence->ip_instance, fence->ring,
>> 			       	fence->fence, timeout_ns, flags, &busy);
>>
>> +
>> 	if (!r && !busy)
>> 		*expired = true;
>>
>> 	return r;
>> }
>> -
>> -static int amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence *fences,
>> -				    uint32_t fence_count,
>> -				    bool wait_all,
>> -				    uint64_t timeout_ns,
>> -				    uint32_t *status,
>> -				    uint32_t *first)
>> -{
>> -	struct drm_amdgpu_fence *drm_fences;
>> -	amdgpu_device_handle dev = fences[0].context->dev;
>> -	union drm_amdgpu_wait_fences args;
>> -	int r;
>> -	uint32_t i;
>> -
>> -	drm_fences = alloca(sizeof(struct drm_amdgpu_fence) * fence_count);
>> -	for (i = 0; i < fence_count; i++) {
>> -		drm_fences[i].ctx_id = fences[i].context->id;
>> -		drm_fences[i].ip_type = fences[i].ip_type;
>> -		drm_fences[i].ip_instance = fences[i].ip_instance;
>> -		drm_fences[i].ring = fences[i].ring;
>> -		drm_fences[i].seq_no = fences[i].fence;
>> -	}
>> -
>> -	memset(&args, 0, sizeof(args));
>> -	args.in.fences = (uint64_t)(uintptr_t)drm_fences;
>> -	args.in.fence_count = fence_count;
>> -	args.in.wait_all = wait_all;
>> -	args.in.timeout_ns = amdgpu_cs_calculate_timeout(timeout_ns);
>> -
>> -	r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_FENCES, &args);
>> -	if (r)
>> -		return -errno;
>> -
>> -	*status = args.out.status;
>> -
>> -	if (first)
>> -		*first = args.out.first_signaled;
>> -
>> -	return 0;
>> -}
>> -
>> -int amdgpu_cs_wait_fences(struct amdgpu_cs_fence *fences,
>> -			  uint32_t fence_count,
>> -			  bool wait_all,
>> -			  uint64_t timeout_ns,
>> -			  uint32_t *status,
>> -			  uint32_t *first)
>> -{
>> -	uint32_t i;
>> -
>> -	/* Sanity check */
>> -	if (!fences || !status || !fence_count)
>> -		return -EINVAL;
>> -
>> -	for (i = 0; i < fence_count; i++) {
>> -		if (NULL == fences[i].context)
>> -			return -EINVAL;
>> -		if (fences[i].ip_type >= AMDGPU_HW_IP_NUM)
>> -			return -EINVAL;
>> -		if (fences[i].ring >= AMDGPU_CS_MAX_RINGS)
>> -			return -EINVAL;
>> -	}
>> -
>> -	*status = 0;
>> -
>> -	return amdgpu_ioctl_wait_fences(fences, fence_count, wait_all,
>> -					timeout_ns, status, first);
>> -}
>> -
>> -int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
>> -{
>> -	struct amdgpu_semaphore *gpu_semaphore;
>> -
>> -	if (!sem)
>> -		return -EINVAL;
>> -
>> -	gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
>> -	if (!gpu_semaphore)
>> -		return -ENOMEM;
>> -
>> -	atomic_set(&gpu_semaphore->refcount, 1);
>> -	*sem = gpu_semaphore;
>> -
>> -	return 0;
>> -}
>> -
>> -int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
>> -			       uint32_t ip_type,
>> -			       uint32_t ip_instance,
>> -			       uint32_t ring,
>> -			       amdgpu_semaphore_handle sem)
>> -{
>> -	if (!ctx || !sem)
>> -		return -EINVAL;
>> -	if (ip_type >= AMDGPU_HW_IP_NUM)
>> -		return -EINVAL;
>> -	if (ring >= AMDGPU_CS_MAX_RINGS)
>> -		return -EINVAL;
>> -	/* sem has been signaled */
>> -	if (sem->signal_fence.context)
>> -		return -EINVAL;
>> -	pthread_mutex_lock(&ctx->sequence_mutex);
>> -	sem->signal_fence.context = ctx;
>> -	sem->signal_fence.ip_type = ip_type;
>> -	sem->signal_fence.ip_instance = ip_instance;
>> -	sem->signal_fence.ring = ring;
>> -	sem->signal_fence.fence = ctx->last_seq[ip_type][ip_instance][ring];
>> -	update_references(NULL, &sem->refcount);
>> -	pthread_mutex_unlock(&ctx->sequence_mutex);
>> -	return 0;
>> -}
>> -
>> -int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
>> -			     uint32_t ip_type,
>> -			     uint32_t ip_instance,
>> -			     uint32_t ring,
>> -			     amdgpu_semaphore_handle sem)
>> -{
>> -	if (!ctx || !sem)
>> -		return -EINVAL;
>> -	if (ip_type >= AMDGPU_HW_IP_NUM)
>> -		return -EINVAL;
>> -	if (ring >= AMDGPU_CS_MAX_RINGS)
>> -		return -EINVAL;
>> -	/* must signal first */
>> -	if (!sem->signal_fence.context)
>> -		return -EINVAL;
>> -
>> -	pthread_mutex_lock(&ctx->sequence_mutex);
>> -	list_add(&sem->list, &ctx->sem_list[ip_type][ip_instance][ring]);
>> -	pthread_mutex_unlock(&ctx->sequence_mutex);
>> -	return 0;
>> -}
>> -
>> -static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)
>> -{
>> -	if (!sem || !sem->signal_fence.context)
>> -		return -EINVAL;
>> -
>> -	sem->signal_fence.context = NULL;;
>> -	sem->signal_fence.ip_type = 0;
>> -	sem->signal_fence.ip_instance = 0;
>> -	sem->signal_fence.ring = 0;
>> -	sem->signal_fence.fence = 0;
>> -
>> -	return 0;
>> -}
>> -
>> -static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)
>> -{
>> -	if (!sem)
>> -		return -EINVAL;
>> -
>> -	if (update_references(&sem->refcount, NULL))
>> -		free(sem);
>> -	return 0;
>> -}
>> -
>> -int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
>> -{
>> -	return amdgpu_cs_unreference_sem(sem);
>> -}
>> diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h
>> index e68246b..8fb7ec6 100644
>> --- a/amdgpu/amdgpu_internal.h
>> +++ b/amdgpu/amdgpu_internal.h
>> @@ -120,23 +120,8 @@ struct amdgpu_bo_list {
>>
>> struct amdgpu_context {
>> 	struct amdgpu_device *dev;
>> -	/** Mutex for accessing fences and to maintain command submissions
>> -	    in good sequence. */
>> -	pthread_mutex_t sequence_mutex;
>> 	/* context id*/
>> 	uint32_t id;
>> -	uint64_t last_seq[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
>> -	struct list_head sem_list[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
>> -};
>> -
>> -/**
>> - * Structure describing sw semaphore based on scheduler
>> - *
>> - */
>> -struct amdgpu_semaphore {
>> -	atomic_t refcount;
>> -	struct list_head list;
>> -	struct amdgpu_cs_fence signal_fence;
>> };
>>
>> /**
>> -- 
>> 2.7.4
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx