[PATCH v6] drm/amdkfd: Provide SMI events watch
Amber Lin
Amber.Lin at amd.com
Sat Apr 18 01:48:31 UTC 2020
On 2020-04-17 6:31 p.m., Felix Kuehling wrote:
> Am 2020-04-17 um 4:07 p.m. schrieb Amber Lin:
>> When the compute is malfunctioning or performance drops, the system admin
>> will use SMI (System Management Interface) tool to monitor/diagnostic what
>> went wrong. This patch provides an event watch interface for the user
>> space to register devices and subscribe events they are interested. After
>> registered, the user can use annoymous file descriptor's poll function
>> with wait-time specified and wait for events to happen. Once an event
>> happens, the user can use read() to retrieve information related to the
>> event.
>>
>> VM fault event is done in this patch.
>>
>> v2: - remove UNREGISTER and add event ENABLE/DISABLE
>> - correct kfifo usage
>> - move event message API to kfd_ioctl.h
>> v3: send the event msg in text than in binary
>> v4: support multiple clients
>> v5: move events enablement from ioctl to fd write
>>
>> Signed-off-by: Amber Lin <Amber.Lin at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdkfd/Makefile | 1 +
>> drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 2 +
>> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 18 ++
>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 7 +
>> drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 +
>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +
>> drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 215 +++++++++++++++++++++++
>> drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 29 +++
>> include/uapi/linux/kfd_ioctl.h | 16 +-
>> 9 files changed, 293 insertions(+), 1 deletion(-)
>> create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>> create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
>> index 6147462..e1e4115 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
>> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
>> @@ -53,6 +53,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
>> $(AMDKFD_PATH)/kfd_int_process_v9.o \
>> $(AMDKFD_PATH)/kfd_dbgdev.o \
>> $(AMDKFD_PATH)/kfd_dbgmgr.o \
>> + $(AMDKFD_PATH)/kfd_smi_events.o \
>> $(AMDKFD_PATH)/kfd_crat.o
>>
>> ifneq ($(CONFIG_AMD_IOMMU_V2),)
>> diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
>> index 9f59ba9..24b4717 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
>> @@ -24,6 +24,7 @@
>> #include "kfd_events.h"
>> #include "cik_int.h"
>> #include "amdgpu_amdkfd.h"
>> +#include "kfd_smi_events.h"
>>
>> static bool cik_event_interrupt_isr(struct kfd_dev *dev,
>> const uint32_t *ih_ring_entry,
>> @@ -107,6 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
>> ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
>> struct kfd_vm_fault_info info;
>>
>> + kfd_smi_event_update_vmfault(dev, pasid);
>> kfd_process_vm_fault(dev->dqm, pasid);
>>
>> memset(&info, 0, sizeof(info));
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index f8fa03a..2baaaec 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -39,6 +39,7 @@
>> #include "kfd_device_queue_manager.h"
>> #include "kfd_dbgmgr.h"
>> #include "amdgpu_amdkfd.h"
>> +#include "kfd_smi_events.h"
>>
>> static long kfd_ioctl(struct file *, unsigned int, unsigned long);
>> static int kfd_open(struct inode *, struct file *);
>> @@ -1732,6 +1733,20 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
>> return r;
>> }
>>
>> +/* Handle requests for watching SMI events */
>> +static int kfd_ioctl_smi_events(struct file *filep,
>> + struct kfd_process *p, void *data)
>> +{
>> + struct kfd_ioctl_smi_events_args *args = data;
>> + struct kfd_dev *dev;
>> +
>> + dev = kfd_device_by_id(args->gpuid);
>> + if (!dev)
>> + return -EINVAL;
>> +
>> + return kfd_smi_event_open(dev, &args->anon_fd);
>> +}
>> +
>> #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
>> [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
>> .cmd_drv = 0, .name = #ioctl}
>> @@ -1827,6 +1842,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
>>
>> AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
>> kfd_ioctl_alloc_queue_gws, 0),
>> +
>> + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
>> + kfd_ioctl_smi_events, 0),
>> };
>>
>> #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 0491ab2..2c030c2 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -586,6 +586,11 @@ static int kfd_gws_init(struct kfd_dev *kfd)
>> return ret;
>> }
>>
>> +static void kfd_smi_init(struct kfd_dev *dev) {
>> + INIT_LIST_HEAD(&dev->smi_clients);
>> + spin_lock_init(&dev->smi_lock);
>> +}
>> +
>> bool kgd2kfd_device_init(struct kfd_dev *kfd,
>> struct drm_device *ddev,
>> const struct kgd2kfd_shared_resources *gpu_resources)
>> @@ -700,6 +705,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>> goto kfd_topology_add_device_error;
>> }
>>
>> + kfd_smi_init(kfd);
>> +
>> kfd->init_complete = true;
>> dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
>> kfd->pdev->device);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>> index e05d75e..151e83e 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>> @@ -24,6 +24,7 @@
>> #include "kfd_events.h"
>> #include "soc15_int.h"
>> #include "kfd_device_queue_manager.h"
>> +#include "kfd_smi_events.h"
>>
>> static bool event_interrupt_isr_v9(struct kfd_dev *dev,
>> const uint32_t *ih_ring_entry,
>> @@ -117,6 +118,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
>> info.prot_read = ring_id & 0x10;
>> info.prot_write = ring_id & 0x20;
>>
>> + kfd_smi_event_update_vmfault(dev, pasid);
>> kfd_process_vm_fault(dev->dqm, pasid);
>> kfd_signal_vm_fault_event(dev, pasid, &info);
>> }
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 43b888b..dc873b0 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -309,6 +309,10 @@ struct kfd_dev {
>>
>> /* Global GWS resource shared b/t processes*/
>> void *gws;
>> +
>> + /* Clients watching SMI events */
>> + struct list_head smi_clients;
>> + spinlock_t smi_lock;
>> };
>>
>> enum kfd_mempool {
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>> new file mode 100644
>> index 0000000..137aec8
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>> @@ -0,0 +1,215 @@
>> +/*
>> + * Copyright 2020 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + */
>> +
>> +#include <linux/poll.h>
>> +#include <linux/wait.h>
>> +#include <linux/anon_inodes.h>
>> +#include <uapi/linux/kfd_ioctl.h>
>> +#include "amdgpu_vm.h"
>> +#include "kfd_priv.h"
>> +#include "kfd_smi_events.h"
>> +
>> +struct kfd_smi_client {
>> + struct list_head list;
>> + struct kfifo fifo;
>> + wait_queue_head_t wait_queue;
>> + /* events enabled */
>> + uint64_t events;
>> + struct kfd_dev *dev;
>> + spinlock_t lock;
>> +};
>> +
>> +#define MAX_KFIFO_SIZE 1024
>> +
>> +static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *);
>> +static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *);
>> +static ssize_t kfd_smi_ev_write(struct file *, const char __user *, size_t,
>> + loff_t *);
>> +static int kfd_smi_ev_release(struct inode *, struct file *);
>> +
>> +static const char kfd_smi_name[] = "kfd_smi_ev";
>> +
>> +static const struct file_operations kfd_smi_ev_fops = {
>> + .owner = THIS_MODULE,
>> + .poll = kfd_smi_ev_poll,
>> + .read = kfd_smi_ev_read,
>> + .write = kfd_smi_ev_write,
>> + .release = kfd_smi_ev_release
>> +};
>> +
>> +static __poll_t kfd_smi_ev_poll(struct file *filep,
>> + struct poll_table_struct *wait)
>> +{
>> + __poll_t mask;
>> + struct kfd_smi_client *client = filep->private_data;
>> +
>> + poll_wait(filep, &client->wait_queue, wait);
>> +
>> + spin_lock(&client->lock);
>> + mask = kfifo_is_empty(&client->fifo) ? 0: POLLIN | POLLRDNORM;
>> + spin_unlock(&client->lock);
>> +
>> + return mask;
>> +}
>> +
>> +static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
>> + size_t size, loff_t *offset)
>> +{
>> + int ret, to_copy;
>> + struct kfd_smi_client *client = filep->private_data;
>> + unsigned char buf[MAX_KFIFO_SIZE];
> If you grow the MAX_KFIFO_SIZE in the future, this will lead to a stack
> overflow. It's OK for now. Maybe add a BUILD_BUG_ON(MAX_KFIFO_SIZE >
> 1024) to make it obvious for anyone who wants to grow the FIFO size later.
>
I have changed kfifo_alloc to use MAX_KFIFO_SIZE (not hard coded 1024)
when changing kfifo_to_user to kfifo_out, so this doesn't overflow. When
people grow the fifo size, they change MAX_KFIFO_SIZE definition.
>> +
>> + /* kfifo_to_user can sleep so we can't use spinlock protection around
>> + * it. Instead, we kfifo out as spinlocked then copy them to the user.
>> + */
>> + spin_lock(&client->lock);
>> + to_copy = kfifo_len(&client->fifo);
>> + if (!to_copy) {
>> + spin_unlock(&client->lock);
>> + return -EAGAIN;
>> + }
>> + to_copy = to_copy > size ? size : to_copy;
> You could use min(size, to_copy) here. But you also need to check
> against sizeof(buf), otherwise you risk overwriting your stack. Maybe
> min3(size, sizeof(buf), to_copy).
>
Same reason as above, kfifo_len should never return greater than
MAX_KFIFO_SIZE, so min(size, to_copy) should be good enough.
>> + ret = kfifo_out(&client->fifo, buf, to_copy);
>> + spin_unlock(&client->lock);
>> + if (ret <= 0)
>> + return -EAGAIN;
>> +
>> + ret = copy_to_user(user, buf, to_copy);
>> + if (ret) {
>> + pr_debug("smi-events: fail to send msg (%i) (%i)\n",
>> + ret, to_copy);
>> + return ret;
>> + }
>> +
>> + return to_copy;
>> +}
>> +
>> +static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
>> + size_t size, loff_t *offset)
>> +{
>> + struct kfd_smi_client *client = filep->private_data;
>> + uint64_t events;
>> +
>> + if (!access_ok(user, size) || size < sizeof(events))
>> + return -EFAULT;
>> + if (copy_from_user(&events, user, sizeof(events)))
>> + return -EFAULT;
>> +
>> + WRITE_ONCE(client->events, events);
>> +
>> + return sizeof(events);
>> +}
>> +
>> +static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
>> +{
>> + struct kfd_smi_client *client = filep->private_data;
>> + struct kfd_dev *dev = client->dev;
>> +
>> + spin_lock(&dev->smi_lock);
>> + list_del_rcu(&client->list);
>> + spin_unlock(&dev->smi_lock);
>> +
>> + synchronize_rcu();
>> + kfifo_free(&client->fifo);
>> + kfree(client);
>> +
>> + return 0;
>> +}
>> +
>> +void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
>> +{
>> + struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
>> + struct amdgpu_task_info task_info;
>> + /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
>> + /* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43
>> + */
>> + char fifo_in[43];
>> + struct kfd_smi_client *client;
>> +
>> + rcu_read_lock();
>> + if (list_empty(&dev->smi_clients)){
>> + rcu_read_unlock();
>> + return;
>> + }
>> + rcu_read_unlock();
> I don't think you need an rcu_read_lock for this atomic list_empty check.
I'll remove them.
>
>> +
>> + amdgpu_vm_get_task_info(adev, pasid, &task_info);
>> + snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
>> + task_info.pid, task_info.task_name);
>> +
>> + rcu_read_lock();
>> +
>> + list_for_each_entry_rcu(client, &dev->smi_clients, list) {
>> + if (!(READ_ONCE(client->events) & KFD_SMI_EVENT_VMFAULT))
>> + continue;
>> + spin_lock(&client->lock);
>> + if (kfifo_avail(&client->fifo) < sizeof(fifo_in)) {
>> + spin_unlock(&client->lock);
>> + rcu_read_unlock();
>> + pr_debug("smi_event(vmfault): no space left\n");
>> + return;
> If one client is overflowing, that should not stop event delivery to
> other clients. That would allow one client to stage a denial of service
> attack on other clients simply by never reading its events. So exiting
> here is wrong. You should just continue with the next client.
Right. My mistake. I'll change it.
>
>> + }
>> + kfifo_in(&client->fifo, fifo_in, sizeof(fifo_in));
>> + wake_up_all(&client->wait_queue);
>> + spin_unlock(&client->lock);
>> + }
>> +
>> + rcu_read_unlock();
>> +}
>> +
>> +int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
>> +{
>> + struct kfd_smi_client *client;
>> + int ret;
>> +
>> + client = kzalloc(sizeof(struct kfd_smi_client), GFP_KERNEL);
>> + if (!client)
>> + return -ENOMEM;
>> + INIT_LIST_HEAD(&client->list);
>> +
>> + ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL);
>> + if (ret) {
>> + kfree(client);
>> + return ret;
>> + }
>> +
>> + ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
>> + O_RDWR);
>> + if (ret < 0) {
>> + kfifo_free(&client->fifo);
>> + kfree(client);
>> + *fd = 0;
> You don't need to overwrite *fd here. If the function fails, it should
> not write its output parameter.
I just feel that I shouldn't leave args.anon_fd as-is if the user didn't
initialize it as an invalid number.
>
>
>> + return ret;
>> + }
>> + *fd = ret;
>> +
>> + init_waitqueue_head(&client->wait_queue);
>> + spin_lock_init(&client->lock);
>> + client->events = 0;
>> + client->dev = dev;
>> +
>> + spin_lock(&dev->smi_lock);
>> + list_add_rcu(&client->list, &dev->smi_clients);
>> + spin_unlock(&dev->smi_lock);
>> +
>> + return 0;
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
>> new file mode 100644
>> index 0000000..a9cb218
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
>> @@ -0,0 +1,29 @@
>> +/*
>> + * Copyright 2020 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + */
>> +
>> +#ifndef KFD_SMI_EVENTS_H_INCLUDED
>> +#define KFD_SMI_EVENTS_H_INCLUDED
>> +
>> +int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
>> +void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
>> +
>> +#endif
>> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
>> index 4f66764..ad33c18 100644
>> --- a/include/uapi/linux/kfd_ioctl.h
>> +++ b/include/uapi/linux/kfd_ioctl.h
> As we discussed, we should bump the KFD_ICOTL_MINOR_VERSION for this
> change. Skip version 1.2 and go straight to 1.3 because version 1.2 is
> buing used by the DKMS builds.
>
> Regards,
> Felix
Yes I do plan to have a 2nd patch following this one once you are happy
with my patch :)
>
>> @@ -442,6 +442,17 @@ struct kfd_ioctl_import_dmabuf_args {
>> __u32 dmabuf_fd; /* to KFD */
>> };
>>
>> +/*
>> + * KFD SMI(System Management Interface) events
>> + */
>> +/* Event type (defined by bitmask) */
>> +#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001
>> +
>> +struct kfd_ioctl_smi_events_args {
>> + __u32 gpuid; /* to KFD */
>> + __u32 anon_fd; /* from KFD */
>> +};
>> +
>> /* Register offset inside the remapped mmio page
>> */
>> enum kfd_mmio_remap {
>> @@ -546,7 +557,10 @@ enum kfd_mmio_remap {
>> #define AMDKFD_IOC_ALLOC_QUEUE_GWS \
>> AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
>>
>> +#define AMDKFD_IOC_SMI_EVENTS \
>> + AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args)
>> +
>> #define AMDKFD_COMMAND_START 0x01
>> -#define AMDKFD_COMMAND_END 0x1F
>> +#define AMDKFD_COMMAND_END 0x20
>>
>> #endif
More information about the amd-gfx
mailing list