[Intel-gfx] [PATCH 6/6] drm/i915: expose rcs topology through query uAPI

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Jan 11 12:45:06 UTC 2018


On 18/12/2017 15:35, Lionel Landwerlin wrote:
> With the introduction of asymetric slices in CNL, we cannot rely on

asymmetric

> the previous SUBSLICE_MASK getparam to tell userspace what subslices
> are available. Here we introduce a more detailed way of querying the
> Gen's GPU topology that doesn't aggregate numbers.
> 
> This is essential for monitoring parts of the GPU with the OA unit,
> because counters need to be normalized to the number of
> EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do
> not gives us sufficient information.
> 
> As a bonus we can draw representations of the GPU :
> 
>          https://imgur.com/a/vuqpa
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_query.c       | 134 ++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_query_info.c |  88 +++++++++++++++++++++
>   include/uapi/drm/i915_drm.h             |  45 +++++++++++
>   3 files changed, 267 insertions(+)
>   create mode 100644 drivers/gpu/drm/i915/intel_query_info.c
> 
> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> index 227a28978190..7c0eb09d3aac 100644
> --- a/drivers/gpu/drm/i915/i915_query.c
> +++ b/drivers/gpu/drm/i915/i915_query.c
> @@ -25,24 +25,158 @@
>   #include "i915_drv.h"
>   #include <uapi/drm/i915_drm.h>
>   
> +static int query_slices_mask(struct drm_i915_private *dev_priv,
> +			     struct drm_i915_query_item *query_item)
> +{
> +	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
> +	struct drm_i915_query_slices_mask slices_info;
> +	u32 data_length, length;
> +
> +	if (sseu->max_slices == 0)
> +		return -ENODEV;

Do you need to handle this explicitly or just return all zeros? I guess 
I don't know which GPUs have zero slices.

> +
> +	memset(&slices_info, 0, sizeof(slices_info));

Is this needed since a) you will write to the only field just below, and 
b) it won't clear the tail data array anyway?

> +
> +	slices_info.n_slices = sseu->max_slices;
Move to after the query_item->length checks.

> +
> +	data_length = sizeof(u8);

Size is dependant on number of slices? DIV_ROUND_UP(n_slices, 
BITS_PER_BYTE) ?

> +	length = sizeof(struct drm_i915_query_slices_mask) + data_length;

sizeof(slices_info)

> +
> +	/*
> +	 * If we ever change the internal slice mask data type, we'll need to
> +	 * update this function.
> +	 */
> +	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));

Hm a bit weak since it is not only about size but also data 
representation. But I don't have any better ideas.

> +
> +	if (query_item->length == 0) {
> +		query_item->length = length;
> +		return 0;
> +	}
> +
> +	if (query_item->length != length)
> +		return -EINVAL;
> +
> +	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr), &slices_info,
> +			 sizeof(slices_info)))
> +		return -EFAULT;
> +
> +	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
> +					 offsetof(struct drm_i915_query_slices_mask, data)),
> +			 &sseu->slice_mask, data_length))
> +		return -EFAULT;
> +
> +	return 0;
> +}

Some of the above comments also apply to the other two query functions 
below.

> +static int query_subslices_mask(struct drm_i915_private *dev_priv,
> +				struct drm_i915_query_item *query_item)
> +{
> +	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
> +	struct drm_i915_query_subslices_mask subslices_info;
> +	u32 data_length, length;
> +
> +	if (sseu->max_slices == 0)
> +		return -ENODEV;
> +
> +	memset(&subslices_info, 0, sizeof(subslices_info));
> +
> +	subslices_info.n_slices = sseu->max_slices;
> +	subslices_info.slice_stride = ALIGN(sseu->max_subslices, 8) / 8;
> +
> +	data_length = subslices_info.n_slices * subslices_info.slice_stride;
> +	length = sizeof(struct drm_i915_query_subslices_mask) + data_length;
> +
> +	if (query_item->length == 0) {
> +		query_item->length = length;
> +		return 0;
> +	}
> +
> +	if (query_item->length != length)
> +		return -EINVAL;
> +
> +	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr), &subslices_info,
> +			 sizeof(subslices_info)))
> +		return -EFAULT;
> +
> +	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
> +					 offsetof(struct drm_i915_query_subslices_mask, data)),
> +			 sseu->subslices_mask, data_length))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
> +static int query_eus_mask(struct drm_i915_private *dev_priv,
> +			  struct drm_i915_query_item *query_item)
> +{
> +	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
> +	struct drm_i915_query_eus_mask eus_info;
> +	u32 data_length, length;
> +
> +	if (sseu->max_slices == 0)
> +		return -ENODEV;
> +
> +	memset(&eus_info, 0, sizeof(eus_info));
> +
> +	eus_info.subslice_stride = ALIGN(sseu->max_eus_per_subslice, 8) / 8;
> +	eus_info.slice_stride = sseu->max_subslices * eus_info.subslice_stride;
> +	eus_info.n_slices = sseu->max_slices;
> +
> +	data_length = eus_info.n_slices * eus_info.slice_stride;
> +	length = sizeof(struct drm_i915_query_eus_mask) + data_length;
> +
> +	if (query_item->length == 0) {
> +		query_item->length = length;
> +		return 0;
> +	}
> +
> +	if (query_item->length != length)
> +		return -EINVAL;
> +
> +	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr), &eus_info,
> +			 sizeof(eus_info)))
> +		return -EFAULT;
> +
> +	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
> +					 offsetof(struct drm_i915_query_eus_mask, data)),
> +			 sseu->eu_mask, data_length))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>   int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   {
> +	struct drm_i915_private *dev_priv = to_i915(dev);
>   	struct drm_i915_query *args = data;
>   	u32 i;
>   
>   	for (i = 0; i < args->num_items; i++) {
>   		struct drm_i915_query_item item;
>   		u64 item_user_ptr = args->items_ptr + sizeof(item) * i;
> +		int ret;
>   
>   		if (copy_from_user(&item, u64_to_user_ptr(item_user_ptr),
>   				   sizeof(item)))
>   			return -EFAULT;
>   
>   		switch (item.query_id) {
> +		case DRM_I915_QUERY_ID_SLICES_MASK:
> +			ret = query_slices_mask(dev_priv, &item);
> +			break;
> +		case DRM_I915_QUERY_ID_SUBSLICES_MASK:
> +			ret = query_subslices_mask(dev_priv, &item);
> +			break;
> +		case DRM_I915_QUERY_ID_EUS_MASK:
> +			ret = query_eus_mask(dev_priv, &item);
> +			break;
>   		default:
>   			return -EINVAL;
>   		}
>   
> +		if (ret)
> +			return ret;
> +
>   		if (copy_to_user(u64_to_user_ptr(item_user_ptr), &item,
>   				 sizeof(item)))
>   			return -EFAULT;
> diff --git a/drivers/gpu/drm/i915/intel_query_info.c b/drivers/gpu/drm/i915/intel_query_info.c
> new file mode 100644
> index 000000000000..79b03be9f51a
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_query_info.c
> @@ -0,0 +1,88 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "i915_drv.h"
> +#include <uapi/drm/i915_drm.h>
> +
> +static int query_info_rcs_topology(struct drm_i915_private *dev_priv,
> +				   struct drm_i915_query_info *args)

Unused artefact of a previous version?

> +{
> +	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
> +	struct drm_i915_rcs_topology_info __user *user_topology =
> +		u64_to_user_ptr(args->info_ptr);
> +	struct drm_i915_rcs_topology_info topology;
> +	u32 data_size, total_size;
> +	const u8 *data = NULL;
> +	int ret;
> +
> +	/* Not supported on gen < 8. */
> +	if (sseu->max_slices == 0)
> +		return -ENODEV;
> +
> +	switch (args->query_params[0]) {
> +	case I915_RCS_TOPOLOGY_SLICE:
> +		topology.params[0] = sseu->max_slices;
> +		data_size = sizeof(sseu->slice_mask);
> +		data = &sseu->slice_mask;
> +		break;
> +
> +	case I915_RCS_TOPOLOGY_SUBSLICE:
> +		topology.params[0] = sseu->max_slices;
> +		topology.params[1] = ALIGN(sseu->max_subslices, 8) / 8;
> +		data_size = sseu->max_slices * topology.params[1];
> +		data = sseu->subslices_mask;
> +		break;
> +
> +	case I915_RCS_TOPOLOGY_EU:
> +		topology.params[2] = ALIGN(sseu->max_eus_per_subslice, 8) / 8;
> +		topology.params[1] = sseu->max_subslices * topology.params[2];
> +		topology.params[0] = sseu->max_slices;
> +		data_size = sseu->max_slices * topology.params[1];
> +		data = sseu->eu_mask;
> +		break;
> +
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	total_size = sizeof(topology) + data_size;
> +
> +	if (args->info_ptr_len == 0) {
> +		args->info_ptr_len = total_size;
> +		return 0;
> +	}
> +
> +	if (args->info_ptr_len < total_size)
> +		return -EINVAL;
> +
> +	ret = copy_to_user(user_topology, &topology, sizeof(topology));
> +	if (ret)
> +		return -EFAULT;
> +
> +	ret = copy_to_user(user_topology + 1, data, data_size);
> +	if (ret)
> +		return -EFAULT;
> +
> +	return 0;
> +}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 87dd8b15548c..cdfbfcb123d0 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1618,6 +1618,9 @@ struct drm_i915_perf_oa_config {
>   
>   struct drm_i915_query_item {
>   	__u32 query_id;
> +#define DRM_I915_QUERY_ID_SLICES_MASK    0x01
> +#define DRM_I915_QUERY_ID_SUBSLICES_MASK 0x02
> +#define DRM_I915_QUERY_ID_EUS_MASK       0x03
>   
>   	/*
>   	 * When left to 0 by userspace, this is filled with the size the data
> @@ -1644,6 +1647,48 @@ struct drm_i915_query {
>   	__u64 items_ptr;
>   };
>   
> +/* Data written by the kernel with query DRM_I915_QUERY_ID_SLICES_MASK :
> + *
> + * data: each bit indicates whether a slice is available (1) or fused off
> + * (0). Formula to tell if slice X is available :
> + *
> + *         (data[X / 8] >> (X % 8)) & 1

Add a helper taking struct drm_i915_query_slices_mask for userspace to 
do this. Macro would be good I think.

> + */
> +struct drm_i915_query_slices_mask {

Rename all uapi struct to _info instead of _mask suffix?

> +	__u32 n_slices;
> +
> +	__u8 data[];

Is a zero size array a GCC extension or something? I somehow seem to 
remember someone was complaining about this.

> +};
> +
> +/* Data written by the kernel with query DRM_I915_QUERY_ID_SUBSLICES_MASK :
> + *
> + * data: each bit indicates whether a subslice is available (1) or fused off
> + * (0). Formula to tell if slice X subslice Y is available :
> + *
> + *         (data[(X * slice_stride) + Y / 8] >> (Y % 8)) & 1
> + */
> +struct drm_i915_query_subslices_mask {
> +	__u32 n_slices;
> +	__u32 slice_stride;

I think it would be better to return subslice_max and also add a query 
helper like above.

> +
> +	__u8 data[];
> +};
> +
> +/* Data written by the kernel with query DRM_I915_QUERY_ID_EUS_MASK :
> + *
> + * data: Each bit indicates whether a subslice is available (1) or fused off
> + * (0). Formula to tell if slice X subslice Y eu Z is available :
> + *
> + *         (data[X * slice_stride + Y * subslice_stride + Z / 8] >> (Z % 8)) & 1
> + */
> +struct drm_i915_query_eus_mask {
> +	__u32 n_slices;
> +	__u32 slice_stride;
> +	__u32 subslice_stride;

Same as above, I think max_slices, max_sublices and a query helper would 
be much better.

> +
> +	__u8 data[];
> +};
> +
>   #if defined(__cplusplus)
>   }
>   #endif
> 

Regards,

Tvrtko


More information about the Intel-gfx mailing list