[PATCH 1/1] drm/amdkfd: Improve concurrency of event handling
Felix Kuehling
felix.kuehling at amd.com
Thu Mar 3 15:31:55 UTC 2022
Am 2022-03-03 um 02:25 schrieb Christian König:
> Am 02.03.22 um 21:06 schrieb Felix Kuehling:
>> Use rcu_read_lock to read p->event_idr concurrently with other readers
>> and writers. Use p->event_mutex only for creating and destroying events
>> and in kfd_wait_on_events.
>
> That might not necessary work as you expected. rcu_read_lock() does
> not provide barriers for the CPU cache.
>
>> Protect the contents of the kfd_event structure with a per-event
>> spinlock that can be taken inside the rcu_read_lock critical section.
>
> That helps to prevent problems for the ev structure, but it doesn't
> protect the idr itself.
idr_find can be used safely under rcu_read_lock according to this
comment in idr.h:
* idr_find() is able to be called locklessly, using RCU. The caller must
* ensure calls to this function are made within rcu_read_lock() regions.
* Other readers (lock-free or otherwise) and modifications may be running
* concurrently.
*
* It is still required that the caller manage the synchronization and
* lifetimes of the items. So if RCU lock-free lookups are used, typically
* this would mean that the items have their own locks, or are amenable to
* lock-free access; and that the items are freed by RCU (or only freed after
* having been deleted from the idr tree *and* a synchronize_rcu() grace
* period).
It was introduced by f9c46d6ea5ce ("idr: make idr_find rcu-safe") in 2008.
>
> BTW: Why are you using an idr in the first place? I don't see the
> lookup used anywhere.
lookup_event_by_id is just a wrapper around idr_find. It's used in
kfd_event_destroy, kfd_set_event, kfd_reset_event and
kfd_wait_on_events. lookup_signaled_event_by_partial_id also uses
idr_find. It's used in kfd_signal_event_interrupt.
Regards,
Felix
>
> Regards,
> Christian.
>
>>
>> This eliminates contention of p->event_mutex in set_event, which tends
>> to be on the critical path for dispatch latency even when busy waiting
>> is used. It also eliminates lock contention in event interrupt handlers.
>> Since the p->event_mutex is now used much less, the impact of requiring
>> it in kfd_wait_on_events should also be much smaller.
>>
>> This should improve event handling latency for processes using multiple
>> GPUs concurrently.
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdkfd/kfd_events.c | 119 +++++++++++++++---------
>> drivers/gpu/drm/amd/amdkfd/kfd_events.h | 1 +
>> 2 files changed, 78 insertions(+), 42 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>> index deecccebe5b6..1726306715b2 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>> @@ -128,8 +128,8 @@ static int
>> allocate_event_notification_slot(struct kfd_process *p,
>> }
>> /*
>> - * Assumes that p->event_mutex is held and of course that p is not
>> going
>> - * away (current or locked).
>> + * Assumes that p->event_mutex or rcu_readlock is held and of course
>> that p is
>> + * not going away.
>> */
>> static struct kfd_event *lookup_event_by_id(struct kfd_process *p,
>> uint32_t id)
>> {
>> @@ -251,15 +251,18 @@ static void destroy_event(struct kfd_process
>> *p, struct kfd_event *ev)
>> struct kfd_event_waiter *waiter;
>> /* Wake up pending waiters. They will return failure */
>> + spin_lock(&ev->lock);
>> list_for_each_entry(waiter, &ev->wq.head, wait.entry)
>> - waiter->event = NULL;
>> + WRITE_ONCE(waiter->event, NULL);
>> wake_up_all(&ev->wq);
>> + spin_unlock(&ev->lock);
>> if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
>> ev->type == KFD_EVENT_TYPE_DEBUG)
>> p->signal_event_count--;
>> idr_remove(&p->event_idr, ev->event_id);
>> + synchronize_rcu();
>> kfree(ev);
>> }
>> @@ -392,6 +395,7 @@ int kfd_event_create(struct file *devkfd,
>> struct kfd_process *p,
>> ev->auto_reset = auto_reset;
>> ev->signaled = false;
>> + spin_lock_init(&ev->lock);
>> init_waitqueue_head(&ev->wq);
>> *event_page_offset = 0;
>> @@ -466,6 +470,7 @@ int kfd_criu_restore_event(struct file *devkfd,
>> ev->auto_reset = ev_priv->auto_reset;
>> ev->signaled = ev_priv->signaled;
>> + spin_lock_init(&ev->lock);
>> init_waitqueue_head(&ev->wq);
>> mutex_lock(&p->event_mutex);
>> @@ -609,13 +614,13 @@ static void set_event(struct kfd_event *ev)
>> /* Auto reset if the list is non-empty and we're waking
>> * someone. waitqueue_active is safe here because we're
>> - * protected by the p->event_mutex, which is also held when
>> + * protected by the ev->lock, which is also held when
>> * updating the wait queues in kfd_wait_on_events.
>> */
>> ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);
>> list_for_each_entry(waiter, &ev->wq.head, wait.entry)
>> - waiter->activated = true;
>> + WRITE_ONCE(waiter->activated, true);
>> wake_up_all(&ev->wq);
>> }
>> @@ -626,16 +631,18 @@ int kfd_set_event(struct kfd_process *p,
>> uint32_t event_id)
>> int ret = 0;
>> struct kfd_event *ev;
>> - mutex_lock(&p->event_mutex);
>> + rcu_read_lock();
>> ev = lookup_event_by_id(p, event_id);
>> + spin_lock(&ev->lock);
>> if (ev && event_can_be_cpu_signaled(ev))
>> set_event(ev);
>> else
>> ret = -EINVAL;
>> - mutex_unlock(&p->event_mutex);
>> + spin_unlock(&ev->lock);
>> + rcu_read_unlock();
>> return ret;
>> }
>> @@ -650,23 +657,25 @@ int kfd_reset_event(struct kfd_process *p,
>> uint32_t event_id)
>> int ret = 0;
>> struct kfd_event *ev;
>> - mutex_lock(&p->event_mutex);
>> + rcu_read_lock();
>> ev = lookup_event_by_id(p, event_id);
>> + spin_lock(&ev->lock);
>> if (ev && event_can_be_cpu_signaled(ev))
>> reset_event(ev);
>> else
>> ret = -EINVAL;
>> - mutex_unlock(&p->event_mutex);
>> + spin_unlock(&ev->lock);
>> + rcu_read_unlock();
>> return ret;
>> }
>> static void acknowledge_signal(struct kfd_process *p, struct
>> kfd_event *ev)
>> {
>> - page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
>> + WRITE_ONCE(page_slots(p->signal_page)[ev->event_id],
>> UNSIGNALED_EVENT_SLOT);
>> }
>> static void set_event_from_interrupt(struct kfd_process *p,
>> @@ -674,7 +683,9 @@ static void set_event_from_interrupt(struct
>> kfd_process *p,
>> {
>> if (ev && event_can_be_gpu_signaled(ev)) {
>> acknowledge_signal(p, ev);
>> + spin_lock(&ev->lock);
>> set_event(ev);
>> + spin_unlock(&ev->lock);
>> }
>> }
>> @@ -693,7 +704,7 @@ void kfd_signal_event_interrupt(u32 pasid,
>> uint32_t partial_id,
>> if (!p)
>> return; /* Presumably process exited. */
>> - mutex_lock(&p->event_mutex);
>> + rcu_read_lock();
>> if (valid_id_bits)
>> ev = lookup_signaled_event_by_partial_id(p, partial_id,
>> @@ -721,7 +732,7 @@ void kfd_signal_event_interrupt(u32 pasid,
>> uint32_t partial_id,
>> if (id >= KFD_SIGNAL_EVENT_LIMIT)
>> break;
>> - if (slots[id] != UNSIGNALED_EVENT_SLOT)
>> + if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT)
>> set_event_from_interrupt(p, ev);
>> }
>> } else {
>> @@ -730,14 +741,14 @@ void kfd_signal_event_interrupt(u32 pasid,
>> uint32_t partial_id,
>> * only signaled events from the IDR.
>> */
>> for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
>> - if (slots[id] != UNSIGNALED_EVENT_SLOT) {
>> + if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) {
>> ev = lookup_event_by_id(p, id);
>> set_event_from_interrupt(p, ev);
>> }
>> }
>> }
>> - mutex_unlock(&p->event_mutex);
>> + rcu_read_unlock();
>> kfd_unref_process(p);
>> }
>> @@ -767,9 +778,11 @@ static int init_event_waiter_get_status(struct
>> kfd_process *p,
>> if (!ev)
>> return -EINVAL;
>> + spin_lock(&ev->lock);
>> waiter->event = ev;
>> waiter->activated = ev->signaled;
>> ev->signaled = ev->signaled && !ev->auto_reset;
>> + spin_unlock(&ev->lock);
>> return 0;
>> }
>> @@ -781,8 +794,11 @@ static void
>> init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter)
>> /* Only add to the wait list if we actually need to
>> * wait on this event.
>> */
>> - if (!waiter->activated)
>> + if (!waiter->activated) {
>> + spin_lock(&ev->lock);
>> add_wait_queue(&ev->wq, &waiter->wait);
>> + spin_unlock(&ev->lock);
>> + }
>> }
>> /* test_event_condition - Test condition of events being waited for
>> @@ -802,10 +818,10 @@ static uint32_t test_event_condition(bool all,
>> uint32_t num_events,
>> uint32_t activated_count = 0;
>> for (i = 0; i < num_events; i++) {
>> - if (!event_waiters[i].event)
>> + if (!READ_ONCE(event_waiters[i].event))
>> return KFD_IOC_WAIT_RESULT_FAIL;
>> - if (event_waiters[i].activated) {
>> + if (READ_ONCE(event_waiters[i].activated)) {
>> if (!all)
>> return KFD_IOC_WAIT_RESULT_COMPLETE;
>> @@ -834,6 +850,8 @@ static int copy_signaled_event_data(uint32_t
>> num_events,
>> for (i = 0; i < num_events; i++) {
>> waiter = &event_waiters[i];
>> event = waiter->event;
>> + if (!event)
>> + return -EINVAL; /* event was destroyed */
>> if (waiter->activated && event->type ==
>> KFD_EVENT_TYPE_MEMORY) {
>> dst = &data[i].memory_exception_data;
>> src = &event->memory_exception_data;
>> @@ -844,11 +862,8 @@ static int copy_signaled_event_data(uint32_t
>> num_events,
>> }
>> return 0;
>> -
>> }
>> -
>> -
>> static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
>> {
>> if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
>> @@ -872,9 +887,12 @@ static void free_waiters(uint32_t num_events,
>> struct kfd_event_waiter *waiters)
>> uint32_t i;
>> for (i = 0; i < num_events; i++)
>> - if (waiters[i].event)
>> + if (waiters[i].event) {
>> + spin_lock(&waiters[i].event->lock);
>> remove_wait_queue(&waiters[i].event->wq,
>> &waiters[i].wait);
>> + spin_unlock(&waiters[i].event->lock);
>> + }
>> kfree(waiters);
>> }
>> @@ -898,6 +916,9 @@ int kfd_wait_on_events(struct kfd_process *p,
>> goto out;
>> }
>> + /* Use p->event_mutex here to protect against concurrent
>> creation and
>> + * destruction of events while we initialize event_waiters.
>> + */
>> mutex_lock(&p->event_mutex);
>> for (i = 0; i < num_events; i++) {
>> @@ -976,14 +997,19 @@ int kfd_wait_on_events(struct kfd_process *p,
>> }
>> __set_current_state(TASK_RUNNING);
>> + mutex_lock(&p->event_mutex);
>> /* copy_signaled_event_data may sleep. So this has to happen
>> * after the task state is set back to RUNNING.
>> + *
>> + * The event may also have been destroyed after signaling. So
>> + * copy_signaled_event_data also must confirm that the event
>> + * still exists. Therefore this must be under the p->event_mutex
>> + * which is also held when events are destroyed.
>> */
>> if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
>> ret = copy_signaled_event_data(num_events,
>> event_waiters, events);
>> - mutex_lock(&p->event_mutex);
>> out_unlock:
>> free_waiters(num_events, event_waiters);
>> mutex_unlock(&p->event_mutex);
>> @@ -1042,8 +1068,7 @@ int kfd_event_mmap(struct kfd_process *p,
>> struct vm_area_struct *vma)
>> }
>> /*
>> - * Assumes that p->event_mutex is held and of course
>> - * that p is not going away (current or locked).
>> + * Assumes that p is not going away.
>> */
>> static void lookup_events_by_type_and_signal(struct kfd_process *p,
>> int type, void *event_data)
>> @@ -1055,6 +1080,8 @@ static void
>> lookup_events_by_type_and_signal(struct kfd_process *p,
>> ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
>> + rcu_read_lock();
>> +
>> id = KFD_FIRST_NONSIGNAL_EVENT_ID;
>> idr_for_each_entry_continue(&p->event_idr, ev, id)
>> if (ev->type == type) {
>> @@ -1062,9 +1089,11 @@ static void
>> lookup_events_by_type_and_signal(struct kfd_process *p,
>> dev_dbg(kfd_device,
>> "Event found: id %X type %d",
>> ev->event_id, ev->type);
>> + spin_lock(&ev->lock);
>> set_event(ev);
>> if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
>> ev->memory_exception_data = *ev_data;
>> + spin_unlock(&ev->lock);
>> }
>> if (type == KFD_EVENT_TYPE_MEMORY) {
>> @@ -1087,6 +1116,8 @@ static void
>> lookup_events_by_type_and_signal(struct kfd_process *p,
>> p->lead_thread->pid, p->pasid);
>> }
>> }
>> +
>> + rcu_read_unlock();
>> }
>> #ifdef KFD_SUPPORT_IOMMU_V2
>> @@ -1162,16 +1193,10 @@ void kfd_signal_iommu_event(struct kfd_dev
>> *dev, u32 pasid,
>> if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) &&
>> KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) &&
>> - KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) {
>> - mutex_lock(&p->event_mutex);
>> -
>> - /* Lookup events by type and signal them */
>> + KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0))
>> lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
>> &memory_exception_data);
>> - mutex_unlock(&p->event_mutex);
>> - }
>> -
>> kfd_unref_process(p);
>> }
>> #endif /* KFD_SUPPORT_IOMMU_V2 */
>> @@ -1188,12 +1213,7 @@ void kfd_signal_hw_exception_event(u32 pasid)
>> if (!p)
>> return; /* Presumably process exited. */
>> - mutex_lock(&p->event_mutex);
>> -
>> - /* Lookup events by type and signal them */
>> lookup_events_by_type_and_signal(p,
>> KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
>> -
>> - mutex_unlock(&p->event_mutex);
>> kfd_unref_process(p);
>> }
>> @@ -1229,16 +1249,19 @@ void kfd_signal_vm_fault_event(struct
>> kfd_dev *dev, u32 pasid,
>> info->prot_write ? 1 : 0;
>> memory_exception_data.failure.imprecise = 0;
>> }
>> - mutex_lock(&p->event_mutex);
>> +
>> + rcu_read_lock();
>> id = KFD_FIRST_NONSIGNAL_EVENT_ID;
>> idr_for_each_entry_continue(&p->event_idr, ev, id)
>> if (ev->type == KFD_EVENT_TYPE_MEMORY) {
>> + spin_lock(&ev->lock);
>> ev->memory_exception_data = memory_exception_data;
>> set_event(ev);
>> + spin_unlock(&ev->lock);
>> }
>> - mutex_unlock(&p->event_mutex);
>> + rcu_read_unlock();
>> kfd_unref_process(p);
>> }
>> @@ -1272,22 +1295,28 @@ void kfd_signal_reset_event(struct kfd_dev
>> *dev)
>> continue;
>> }
>> - mutex_lock(&p->event_mutex);
>> + rcu_read_lock();
>> +
>> id = KFD_FIRST_NONSIGNAL_EVENT_ID;
>> idr_for_each_entry_continue(&p->event_idr, ev, id) {
>> if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
>> + spin_lock(&ev->lock);
>> ev->hw_exception_data = hw_exception_data;
>> ev->hw_exception_data.gpu_id = user_gpu_id;
>> set_event(ev);
>> + spin_unlock(&ev->lock);
>> }
>> if (ev->type == KFD_EVENT_TYPE_MEMORY &&
>> reset_cause == KFD_HW_EXCEPTION_ECC) {
>> + spin_lock(&ev->lock);
>> ev->memory_exception_data = memory_exception_data;
>> ev->memory_exception_data.gpu_id = user_gpu_id;
>> set_event(ev);
>> + spin_unlock(&ev->lock);
>> }
>> }
>> - mutex_unlock(&p->event_mutex);
>> +
>> + rcu_read_unlock();
>> }
>> srcu_read_unlock(&kfd_processes_srcu, idx);
>> }
>> @@ -1320,19 +1349,25 @@ void kfd_signal_poison_consumed_event(struct
>> kfd_dev *dev, u32 pasid)
>> memory_exception_data.gpu_id = user_gpu_id;
>> memory_exception_data.failure.imprecise = true;
>> - mutex_lock(&p->event_mutex);
>> + rcu_read_lock();
>> +
>> idr_for_each_entry_continue(&p->event_idr, ev, id) {
>> if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
>> + spin_lock(&ev->lock);
>> ev->hw_exception_data = hw_exception_data;
>> set_event(ev);
>> + spin_unlock(&ev->lock);
>> }
>> if (ev->type == KFD_EVENT_TYPE_MEMORY) {
>> + spin_lock(&ev->lock);
>> ev->memory_exception_data = memory_exception_data;
>> set_event(ev);
>> + spin_unlock(&ev->lock);
>> }
>> }
>> - mutex_unlock(&p->event_mutex);
>> +
>> + rcu_read_unlock();
>> /* user application will handle SIGBUS signal */
>> send_sig(SIGBUS, p->lead_thread, 0);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
>> index 1238af11916e..55d376f56021 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
>> @@ -59,6 +59,7 @@ struct kfd_event {
>> int type;
>> + spinlock_t lock;
>> wait_queue_head_t wq; /* List of event waiters. */
>> /* Only for signal events. */
>
More information about the amd-gfx
mailing list