[Intel-gfx] [PATCH 4/4] drm/i915: expose rcs topology through discovery uAPI
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Nov 9 17:34:59 UTC 2017
On 08/11/2017 16:22, Lionel Landwerlin wrote:
> With the introduction of asymetric slices in CNL, we cannot rely on
> the previous SUBSLICE_MASK getparam. Here we introduce a more detailed
> way of querying the Gen's GPU topology that doesn't aggregate numbers.
>
> This is essential for monitoring parts of the GPU with the OA unit,
> because counters need to be normalized to the number of
> EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do
> not gives us sufficient information.
>
> This change introduce a new way to query properties of the GPU, making
> room for new queries (some media related topology could be exposed in
> the future).
>
> As a bonus we can draw representations of the GPU :
>
> https://imgur.com/a/vuqpa
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> drivers/gpu/drm/i915/intel_query_info.c | 64 +++++++++++++++++++++++++++++++++
> include/uapi/drm/i915_drm.h | 55 ++++++++++++++++++++++++++++
> 2 files changed, 119 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/intel_query_info.c b/drivers/gpu/drm/i915/intel_query_info.c
> index 21434c393582..45c5b29e0988 100644
> --- a/drivers/gpu/drm/i915/intel_query_info.c
> +++ b/drivers/gpu/drm/i915/intel_query_info.c
> @@ -25,6 +25,68 @@
> #include "i915_drv.h"
> #include <uapi/drm/i915_drm.h>
>
> +static int query_info_rcs_topology(struct drm_i915_private *dev_priv,
> + struct drm_i915_query_info *args)
> +{
> + const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
> + struct drm_i915_rcs_topology_info __user *user_topology =
> + u64_to_user_ptr(args->info_ptr);
> + struct drm_i915_rcs_topology_info topology;
> + u32 data_size, total_size;
> + const u8 *data = NULL;
> + int ret;
> +
> + /* Not supported on gen < 8. */
> + if (sseu->max_slices == 0)
> + return -ENODEV;
> +
> + switch (args->query_params[0]) {
> + case I915_RCS_TOPOLOGY_SLICE:
> + topology.params[0] = sseu->max_slices;
> + data_size = sizeof(sseu->slice_mask);
> + data = &sseu->slice_mask;
Regardless of the solution, you will need to translate the internal
kernel data into something strictly defined in the uAPI headers,
otherwise we leak internal organization into ABI.
> + break;
> +
> + case I915_RCS_TOPOLOGY_SUBSLICE:
> + topology.params[0] = sseu->max_slices;
> + topology.params[1] = ALIGN(sseu->max_subslices, 8) / 8;
> + data_size = sseu->max_slices * topology.params[1];
> + data = sseu->subslices_mask;
> + break;
> +
> + case I915_RCS_TOPOLOGY_EU:
> + topology.params[2] = ALIGN(sseu->max_eus_per_subslice, 8) / 8;
> + topology.params[1] = sseu->max_subslices * topology.params[2];
> + topology.params[0] = sseu->max_slices;
> + data_size = sseu->max_slices * topology.params[1];
> + data = sseu->eu_mask;
> + break;
> +
> + default:
> + return -EINVAL;
> + }
> +
> + total_size = sizeof(topology) + data_size;
> +
> + if (args->info_ptr_len == 0) {
> + args->info_ptr_len = total_size;
> + return 0;
> + }
> +
> + if (args->info_ptr_len < total_size)
> + return -EINVAL;
> +
> + ret = copy_to_user(user_topology, &topology, sizeof(topology));
> + if (ret)
> + return -EFAULT;
> +
> + ret = copy_to_user(user_topology + 1, data, data_size);
> + if (ret)
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> static u8 user_class_map[I915_ENGINE_CLASS_MAX] = {
> [I915_ENGINE_CLASS_OTHER] = OTHER_CLASS,
> [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
> @@ -112,6 +174,8 @@ int i915_query_info_ioctl(struct drm_device *dev, void *data,
> switch (args->query) {
> case I915_QUERY_INFO_ENGINE:
> return query_info_engine(dev_priv, args);
> + case I915_QUERY_INFO_RCS_TOPOLOGY:
> + return query_info_rcs_topology(dev_priv, args);
> default:
> return -EINVAL;
> }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index c6026616300e..5b6a8995a948 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1569,6 +1569,61 @@ struct drm_i915_engine_info {
> __u8 rsvd2[6];
> };
>
> +/* Query RCS topology.
> + *
> + * drm_i915_query_info.query_params[0] should be set to one of the
> + * I915_RCS_TOPOLOGY_* define.
> + *
> + * drm_i915_gem_query_info.info_ptr will be written to with
> + * drm_i915_rcs_topology_info.
> + */
> +#define I915_QUERY_INFO_RCS_TOPOLOGY 1 /* version 1 */
> +
> +/* Query RCS slice topology
> + *
> + * The meaning of the drm_i915_rcs_topology_info fields is :
> + *
> + * params[0]: number of slices
> + *
> + * data: Each bit indicates whether a slice is available (1) or fused off (0).
> + * Formula to tell if slice X is available :
> + *
> + * (data[X / 8] >> (X % 8)) & 1
> + */
> +#define I915_RCS_TOPOLOGY_SLICE 0 /* version 1 */
> +/* Query RCS subslice topology
> + *
> + * The meaning of the drm_i915_rcs_topology_info fields is :
> + *
> + * params[0]: number of slices
> + * params[1]: slice stride
> + *
> + * data: each bit indicates whether a subslice is available (1) or fused off
> + * (0). Formula to tell if slice X subslice Y is available :
> + *
> + * (data[(X * params[1]) + Y / 8] >> (Y % 8)) & 1
> + */
> +#define I915_RCS_TOPOLOGY_SUBSLICE 1 /* version 1 */
> +/* Query RCS EU topology
> + *
> + * The meaning of the drm_i915_rcs_topology_info fields is :
> + *
> + * params[0]: number of slices
> + * params[1]: slice stride
> + * params[2]: subslice stride
> + *
> + * data: Each bit indicates whether a subslice is available (1) or fused off
> + * (0). Formula to tell if slice X subslice Y eu Z is available :
> + *
> + * (data[X * params[1] + Y * params[2] + Z / 8] >> (Z % 8)) & 1
> + */
> +#define I915_RCS_TOPOLOGY_EU 2 /* version 1 */
> +
> +struct drm_i915_rcs_topology_info {
> + __u32 params[6];
> +
> + __u8 data[];
You don't use this marker in the patch.
But in general would it be feasible to define and name the returned data
more precisely? Like:
struct drm_engine_rcs_engine_info {
...
/existing_stuff/
...
#define HAS_TOPOLOGY
u32 flags;
struct {
u32 this;
u32 that;
...
u8 mask[SOME_FUTURE_PROOF_NUMBER];
} slice_topology;
struct {
u32 this;
u32 that;
...
u8 mask[SOME_FUTURE_PROOF_NUMBER];
} subslice_topology;
struct {
u32 this;
u32 that;
...
u8 mask[SOME_FUTURE_PROOF_NUMBER];
} eu_topology;
};
That said, perhaps RCS topology belongs more to the GPU than the render
engine.
Regards,
Tvrtko
> +};
>
> struct drm_i915_query_info {
> /* in/out: Protocol version requested/supported. When set to 0, the
>
More information about the Intel-gfx
mailing list