[PATCH 1/1] drm/amdkfd: Improve concurrency of event handling

Thu Mar 3 07:25:16 UTC 2022

Am 02.03.22 um 21:06 schrieb Felix Kuehling:
> Use rcu_read_lock to read p->event_idr concurrently with other readers
> and writers. Use p->event_mutex only for creating and destroying events
> and in kfd_wait_on_events.

That might not necessary work as you expected. rcu_read_lock() does not 
provide barriers for the CPU cache.

> Protect the contents of the kfd_event structure with a per-event
> spinlock that can be taken inside the rcu_read_lock critical section.

That helps to prevent problems for the ev structure, but it doesn't 
protect the idr itself.

BTW: Why are you using an idr in the first place? I don't see the lookup 
used anywhere.

Regards,
Christian.

>
> This eliminates contention of p->event_mutex in set_event, which tends
> to be on the critical path for dispatch latency even when busy waiting
> is used. It also eliminates lock contention in event interrupt handlers.
> Since the p->event_mutex is now used much less, the impact of requiring
> it in kfd_wait_on_events should also be much smaller.
>
> This should improve event handling latency for processes using multiple
> GPUs concurrently.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_events.c | 119 +++++++++++++++---------
>   drivers/gpu/drm/amd/amdkfd/kfd_events.h |   1 +
>   2 files changed, 78 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index deecccebe5b6..1726306715b2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -128,8 +128,8 @@ static int allocate_event_notification_slot(struct kfd_process *p,
>   }
>   
>   /*
> - * Assumes that p->event_mutex is held and of course that p is not going
> - * away (current or locked).
> + * Assumes that p->event_mutex or rcu_readlock is held and of course that p is
> + * not going away.
>    */
>   static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
>   {
> @@ -251,15 +251,18 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
>   	struct kfd_event_waiter *waiter;
>   
>   	/* Wake up pending waiters. They will return failure */
> +	spin_lock(&ev->lock);
>   	list_for_each_entry(waiter, &ev->wq.head, wait.entry)
> -		waiter->event = NULL;
> +		WRITE_ONCE(waiter->event, NULL);
>   	wake_up_all(&ev->wq);
> +	spin_unlock(&ev->lock);
>   
>   	if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
>   	    ev->type == KFD_EVENT_TYPE_DEBUG)
>   		p->signal_event_count--;
>   
>   	idr_remove(&p->event_idr, ev->event_id);
> +	synchronize_rcu();
>   	kfree(ev);
>   }
>   
> @@ -392,6 +395,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
>   	ev->auto_reset = auto_reset;
>   	ev->signaled = false;
>   
> +	spin_lock_init(&ev->lock);
>   	init_waitqueue_head(&ev->wq);
>   
>   	*event_page_offset = 0;
> @@ -466,6 +470,7 @@ int kfd_criu_restore_event(struct file *devkfd,
>   	ev->auto_reset = ev_priv->auto_reset;
>   	ev->signaled = ev_priv->signaled;
>   
> +	spin_lock_init(&ev->lock);
>   	init_waitqueue_head(&ev->wq);
>   
>   	mutex_lock(&p->event_mutex);
> @@ -609,13 +614,13 @@ static void set_event(struct kfd_event *ev)
>   
>   	/* Auto reset if the list is non-empty and we're waking
>   	 * someone. waitqueue_active is safe here because we're
> -	 * protected by the p->event_mutex, which is also held when
> +	 * protected by the ev->lock, which is also held when
>   	 * updating the wait queues in kfd_wait_on_events.
>   	 */
>   	ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);
>   
>   	list_for_each_entry(waiter, &ev->wq.head, wait.entry)
> -		waiter->activated = true;
> +		WRITE_ONCE(waiter->activated, true);
>   
>   	wake_up_all(&ev->wq);
>   }
> @@ -626,16 +631,18 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id)
>   	int ret = 0;
>   	struct kfd_event *ev;
>   
> -	mutex_lock(&p->event_mutex);
> +	rcu_read_lock();
>   
>   	ev = lookup_event_by_id(p, event_id);
> +	spin_lock(&ev->lock);
>   
>   	if (ev && event_can_be_cpu_signaled(ev))
>   		set_event(ev);
>   	else
>   		ret = -EINVAL;
>   
> -	mutex_unlock(&p->event_mutex);
> +	spin_unlock(&ev->lock);
> +	rcu_read_unlock();
>   	return ret;
>   }
>   
> @@ -650,23 +657,25 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
>   	int ret = 0;
>   	struct kfd_event *ev;
>   
> -	mutex_lock(&p->event_mutex);
> +	rcu_read_lock();
>   
>   	ev = lookup_event_by_id(p, event_id);
> +	spin_lock(&ev->lock);
>   
>   	if (ev && event_can_be_cpu_signaled(ev))
>   		reset_event(ev);
>   	else
>   		ret = -EINVAL;
>   
> -	mutex_unlock(&p->event_mutex);
> +	spin_unlock(&ev->lock);
> +	rcu_read_unlock();
>   	return ret;
>   
>   }
>   
>   static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
>   {
> -	page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
> +	WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT);
>   }
>   
>   static void set_event_from_interrupt(struct kfd_process *p,
> @@ -674,7 +683,9 @@ static void set_event_from_interrupt(struct kfd_process *p,
>   {
>   	if (ev && event_can_be_gpu_signaled(ev)) {
>   		acknowledge_signal(p, ev);
> +		spin_lock(&ev->lock);
>   		set_event(ev);
> +		spin_unlock(&ev->lock);
>   	}
>   }
>   
> @@ -693,7 +704,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
>   	if (!p)
>   		return; /* Presumably process exited. */
>   
> -	mutex_lock(&p->event_mutex);
> +	rcu_read_lock();
>   
>   	if (valid_id_bits)
>   		ev = lookup_signaled_event_by_partial_id(p, partial_id,
> @@ -721,7 +732,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
>   				if (id >= KFD_SIGNAL_EVENT_LIMIT)
>   					break;
>   
> -				if (slots[id] != UNSIGNALED_EVENT_SLOT)
> +				if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT)
>   					set_event_from_interrupt(p, ev);
>   			}
>   		} else {
> @@ -730,14 +741,14 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
>   			 * only signaled events from the IDR.
>   			 */
>   			for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
> -				if (slots[id] != UNSIGNALED_EVENT_SLOT) {
> +				if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) {
>   					ev = lookup_event_by_id(p, id);
>   					set_event_from_interrupt(p, ev);
>   				}
>   		}
>   	}
>   
> -	mutex_unlock(&p->event_mutex);
> +	rcu_read_unlock();
>   	kfd_unref_process(p);
>   }
>   
> @@ -767,9 +778,11 @@ static int init_event_waiter_get_status(struct kfd_process *p,
>   	if (!ev)
>   		return -EINVAL;
>   
> +	spin_lock(&ev->lock);
>   	waiter->event = ev;
>   	waiter->activated = ev->signaled;
>   	ev->signaled = ev->signaled && !ev->auto_reset;
> +	spin_unlock(&ev->lock);
>   
>   	return 0;
>   }
> @@ -781,8 +794,11 @@ static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter)
>   	/* Only add to the wait list if we actually need to
>   	 * wait on this event.
>   	 */
> -	if (!waiter->activated)
> +	if (!waiter->activated) {
> +		spin_lock(&ev->lock);
>   		add_wait_queue(&ev->wq, &waiter->wait);
> +		spin_unlock(&ev->lock);
> +	}
>   }
>   
>   /* test_event_condition - Test condition of events being waited for
> @@ -802,10 +818,10 @@ static uint32_t test_event_condition(bool all, uint32_t num_events,
>   	uint32_t activated_count = 0;
>   
>   	for (i = 0; i < num_events; i++) {
> -		if (!event_waiters[i].event)
> +		if (!READ_ONCE(event_waiters[i].event))
>   			return KFD_IOC_WAIT_RESULT_FAIL;
>   
> -		if (event_waiters[i].activated) {
> +		if (READ_ONCE(event_waiters[i].activated)) {
>   			if (!all)
>   				return KFD_IOC_WAIT_RESULT_COMPLETE;
>   
> @@ -834,6 +850,8 @@ static int copy_signaled_event_data(uint32_t num_events,
>   	for (i = 0; i < num_events; i++) {
>   		waiter = &event_waiters[i];
>   		event = waiter->event;
> +		if (!event)
> +			return -EINVAL; /* event was destroyed */
>   		if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
>   			dst = &data[i].memory_exception_data;
>   			src = &event->memory_exception_data;
> @@ -844,11 +862,8 @@ static int copy_signaled_event_data(uint32_t num_events,
>   	}
>   
>   	return 0;
> -
>   }
>   
> -
> -
>   static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
>   {
>   	if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
> @@ -872,9 +887,12 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
>   	uint32_t i;
>   
>   	for (i = 0; i < num_events; i++)
> -		if (waiters[i].event)
> +		if (waiters[i].event) {
> +			spin_lock(&waiters[i].event->lock);
>   			remove_wait_queue(&waiters[i].event->wq,
>   					  &waiters[i].wait);
> +			spin_unlock(&waiters[i].event->lock);
> +		}
>   
>   	kfree(waiters);
>   }
> @@ -898,6 +916,9 @@ int kfd_wait_on_events(struct kfd_process *p,
>   		goto out;
>   	}
>   
> +	/* Use p->event_mutex here to protect against concurrent creation and
> +	 * destruction of events while we initialize event_waiters.
> +	 */
>   	mutex_lock(&p->event_mutex);
>   
>   	for (i = 0; i < num_events; i++) {
> @@ -976,14 +997,19 @@ int kfd_wait_on_events(struct kfd_process *p,
>   	}
>   	__set_current_state(TASK_RUNNING);
>   
> +	mutex_lock(&p->event_mutex);
>   	/* copy_signaled_event_data may sleep. So this has to happen
>   	 * after the task state is set back to RUNNING.
> +	 *
> +	 * The event may also have been destroyed after signaling. So
> +	 * copy_signaled_event_data also must confirm that the event
> +	 * still exists. Therefore this must be under the p->event_mutex
> +	 * which is also held when events are destroyed.
>   	 */
>   	if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
>   		ret = copy_signaled_event_data(num_events,
>   					       event_waiters, events);
>   
> -	mutex_lock(&p->event_mutex);
>   out_unlock:
>   	free_waiters(num_events, event_waiters);
>   	mutex_unlock(&p->event_mutex);
> @@ -1042,8 +1068,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
>   }
>   
>   /*
> - * Assumes that p->event_mutex is held and of course
> - * that p is not going away (current or locked).
> + * Assumes that p is not going away.
>    */
>   static void lookup_events_by_type_and_signal(struct kfd_process *p,
>   		int type, void *event_data)
> @@ -1055,6 +1080,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
>   
>   	ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
>   
> +	rcu_read_lock();
> +
>   	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
>   	idr_for_each_entry_continue(&p->event_idr, ev, id)
>   		if (ev->type == type) {
> @@ -1062,9 +1089,11 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
>   			dev_dbg(kfd_device,
>   					"Event found: id %X type %d",
>   					ev->event_id, ev->type);
> +			spin_lock(&ev->lock);
>   			set_event(ev);
>   			if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
>   				ev->memory_exception_data = *ev_data;
> +			spin_unlock(&ev->lock);
>   		}
>   
>   	if (type == KFD_EVENT_TYPE_MEMORY) {
> @@ -1087,6 +1116,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
>   				p->lead_thread->pid, p->pasid);
>   		}
>   	}
> +
> +	rcu_read_unlock();
>   }
>   
>   #ifdef KFD_SUPPORT_IOMMU_V2
> @@ -1162,16 +1193,10 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
>   
>   	if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) &&
>   	    KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) &&
> -	    KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) {
> -		mutex_lock(&p->event_mutex);
> -
> -		/* Lookup events by type and signal them */
> +	    KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0))
>   		lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
>   				&memory_exception_data);
>   
> -		mutex_unlock(&p->event_mutex);
> -	}
> -
>   	kfd_unref_process(p);
>   }
>   #endif /* KFD_SUPPORT_IOMMU_V2 */
> @@ -1188,12 +1213,7 @@ void kfd_signal_hw_exception_event(u32 pasid)
>   	if (!p)
>   		return; /* Presumably process exited. */
>   
> -	mutex_lock(&p->event_mutex);
> -
> -	/* Lookup events by type and signal them */
>   	lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
> -
> -	mutex_unlock(&p->event_mutex);
>   	kfd_unref_process(p);
>   }
>   
> @@ -1229,16 +1249,19 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
>   			info->prot_write ? 1 : 0;
>   		memory_exception_data.failure.imprecise = 0;
>   	}
> -	mutex_lock(&p->event_mutex);
> +
> +	rcu_read_lock();
>   
>   	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
>   	idr_for_each_entry_continue(&p->event_idr, ev, id)
>   		if (ev->type == KFD_EVENT_TYPE_MEMORY) {
> +			spin_lock(&ev->lock);
>   			ev->memory_exception_data = memory_exception_data;
>   			set_event(ev);
> +			spin_unlock(&ev->lock);
>   		}
>   
> -	mutex_unlock(&p->event_mutex);
> +	rcu_read_unlock();
>   	kfd_unref_process(p);
>   }
>   
> @@ -1272,22 +1295,28 @@ void kfd_signal_reset_event(struct kfd_dev *dev)
>   			continue;
>   		}
>   
> -		mutex_lock(&p->event_mutex);
> +		rcu_read_lock();
> +
>   		id = KFD_FIRST_NONSIGNAL_EVENT_ID;
>   		idr_for_each_entry_continue(&p->event_idr, ev, id) {
>   			if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
> +				spin_lock(&ev->lock);
>   				ev->hw_exception_data = hw_exception_data;
>   				ev->hw_exception_data.gpu_id = user_gpu_id;
>   				set_event(ev);
> +				spin_unlock(&ev->lock);
>   			}
>   			if (ev->type == KFD_EVENT_TYPE_MEMORY &&
>   			    reset_cause == KFD_HW_EXCEPTION_ECC) {
> +				spin_lock(&ev->lock);
>   				ev->memory_exception_data = memory_exception_data;
>   				ev->memory_exception_data.gpu_id = user_gpu_id;
>   				set_event(ev);
> +				spin_unlock(&ev->lock);
>   			}
>   		}
> -		mutex_unlock(&p->event_mutex);
> +
> +		rcu_read_unlock();
>   	}
>   	srcu_read_unlock(&kfd_processes_srcu, idx);
>   }
> @@ -1320,19 +1349,25 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
>   	memory_exception_data.gpu_id = user_gpu_id;
>   	memory_exception_data.failure.imprecise = true;
>   
> -	mutex_lock(&p->event_mutex);
> +	rcu_read_lock();
> +
>   	idr_for_each_entry_continue(&p->event_idr, ev, id) {
>   		if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
> +			spin_lock(&ev->lock);
>   			ev->hw_exception_data = hw_exception_data;
>   			set_event(ev);
> +			spin_unlock(&ev->lock);
>   		}
>   
>   		if (ev->type == KFD_EVENT_TYPE_MEMORY) {
> +			spin_lock(&ev->lock);
>   			ev->memory_exception_data = memory_exception_data;
>   			set_event(ev);
> +			spin_unlock(&ev->lock);
>   		}
>   	}
> -	mutex_unlock(&p->event_mutex);
> +
> +	rcu_read_unlock();
>   
>   	/* user application will handle SIGBUS signal */
>   	send_sig(SIGBUS, p->lead_thread, 0);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
> index 1238af11916e..55d376f56021 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
> @@ -59,6 +59,7 @@ struct kfd_event {
>   
>   	int type;
>   
> +	spinlock_t lock;
>   	wait_queue_head_t wq; /* List of event waiters. */
>   
>   	/* Only for signal events. */