[PATCH] drm/amdkfd: Replace bitmask with event idx in SMI event msg

Felix Kuehling felix.kuehling at amd.com
Mon Jul 27 23:42:19 UTC 2020


Am 2020-07-26 um 5:24 p.m. schrieb Mukul Joshi:
> Event bitmask is a 64-bit mask with only 1 bit set. Sending this
> event bitmask in KFD SMI event message is both wasteful of memory
> and potentially limiting to only 64 events. Instead send event
> index in SMI event message.

Please add a statement here, that for the two event types defined so
far, this change does not break the ABI. The new index will be identical
to the mask used before.


>
> Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
> Suggested-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 24 +++++++++++----------
>  include/uapi/linux/kfd_ioctl.h              | 10 ++++++---
>  2 files changed, 20 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> index 86c2c3e97944..4d4b6e3ab697 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> @@ -149,7 +149,7 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
>  	return 0;
>  }
>  
> -static void add_event_to_kfifo(struct kfd_dev *dev, unsigned long long smi_event,
> +static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
>  			      char *event_msg, int len)
>  {
>  	struct kfd_smi_client *client;
> @@ -157,14 +157,15 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned long long smi_event
>  	rcu_read_lock();
>  
>  	list_for_each_entry_rcu(client, &dev->smi_clients, list) {
> -		if (!(READ_ONCE(client->events) & smi_event))
> +		if (!(READ_ONCE(client->events) &
> +				KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
>  			continue;
>  		spin_lock(&client->lock);
>  		if (kfifo_avail(&client->fifo) >= len) {
>  			kfifo_in(&client->fifo, event_msg, len);
>  			wake_up_all(&client->wait_queue);
>  		} else {
> -			pr_debug("smi_event(EventID: %llu): no space left\n",
> +			pr_debug("smi_event(EventID: %u): no space left\n",
>  					smi_event);
>  		}
>  		spin_unlock(&client->lock);
> @@ -180,21 +181,21 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
>  	/*
>  	 * ThermalThrottle msg = throttle_bitmask(8):
>  	 * 			 thermal_interrupt_count(16):
> -	 * 16 bytes event + 1 byte space + 8 byte throttle_bitmask +
> +	 * 1 byte event + 1 byte space + 8 byte throttle_bitmask +
>  	 * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
> -	 * 1 byte \0 = 44
> +	 * 1 byte \0 = 29
>  	 */
> -	char fifo_in[44];
> +	char fifo_in[29];
>  	int len;
>  
>  	if (list_empty(&dev->smi_clients))
>  		return;
>  
> -	len = snprintf(fifo_in, 44, "%x %x:%llx\n",
> +	len = snprintf(fifo_in, 29, "%x %x:%llx\n",
>  		       KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
>  		       atomic64_read(&adev->smu.throttle_int_counter));
>  
> -	add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
> +	add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE,	fifo_in, len);
>  }
>  
>  void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
> @@ -202,9 +203,10 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
>  	struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
>  	struct amdgpu_task_info task_info;
>  	/* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
> -	/* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43
> +	/* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
> +	 * 1 byte \0 = 29
>  	 */
> -	char fifo_in[43];
> +	char fifo_in[29];
>  	int len;
>  
>  	if (list_empty(&dev->smi_clients))
> @@ -216,7 +218,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
>  	if (!task_info.pid)
>  		return;
>  
> -	len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
> +	len = snprintf(fifo_in, 29, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
>  		task_info.pid, task_info.task_name);
>  
>  	add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index df6c7a43aadc..796f836ba773 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -449,9 +449,13 @@ struct kfd_ioctl_import_dmabuf_args {
>  /*
>   * KFD SMI(System Management Interface) events
>   */
> -/* Event type (defined by bitmask) */
> -#define KFD_SMI_EVENT_VMFAULT			0x0000000000000001
> -#define KFD_SMI_EVENT_THERMAL_THROTTLE		0x0000000000000002
> +enum kfd_smi_event {
> +        KFD_SMI_EVENT_NONE = 0, /* not used */
> +        KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
> +        KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
> +};
> +
> +#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << (i - 1))

It's common best practice to wrap all macro parameters in ().

With that fixed, the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>

Thanks,
  Felix


>  
>  struct kfd_ioctl_smi_events_args {
>  	__u32 gpuid;	/* to KFD */


More information about the amd-gfx mailing list