[Intel-gfx] [PATCH 4/4] drm/i915: expose rcs topology through discovery uAPI
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Fri Nov 10 16:37:33 UTC 2017
On 09/11/17 17:34, Tvrtko Ursulin wrote:
>
> On 08/11/2017 16:22, Lionel Landwerlin wrote:
>> With the introduction of asymetric slices in CNL, we cannot rely on
>> the previous SUBSLICE_MASK getparam. Here we introduce a more detailed
>> way of querying the Gen's GPU topology that doesn't aggregate numbers.
>>
>> This is essential for monitoring parts of the GPU with the OA unit,
>> because counters need to be normalized to the number of
>> EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do
>> not gives us sufficient information.
>>
>> This change introduce a new way to query properties of the GPU, making
>> room for new queries (some media related topology could be exposed in
>> the future).
>>
>> As a bonus we can draw representations of the GPU :
>>
>> https://imgur.com/a/vuqpa
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
>> drivers/gpu/drm/i915/intel_query_info.c | 64
>> +++++++++++++++++++++++++++++++++
>> include/uapi/drm/i915_drm.h | 55
>> ++++++++++++++++++++++++++++
>> 2 files changed, 119 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_query_info.c
>> b/drivers/gpu/drm/i915/intel_query_info.c
>> index 21434c393582..45c5b29e0988 100644
>> --- a/drivers/gpu/drm/i915/intel_query_info.c
>> +++ b/drivers/gpu/drm/i915/intel_query_info.c
>> @@ -25,6 +25,68 @@
>> #include "i915_drv.h"
>> #include <uapi/drm/i915_drm.h>
>> +static int query_info_rcs_topology(struct drm_i915_private *dev_priv,
>> + struct drm_i915_query_info *args)
>> +{
>> + const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
>> + struct drm_i915_rcs_topology_info __user *user_topology =
>> + u64_to_user_ptr(args->info_ptr);
>> + struct drm_i915_rcs_topology_info topology;
>> + u32 data_size, total_size;
>> + const u8 *data = NULL;
>> + int ret;
>> +
>> + /* Not supported on gen < 8. */
>> + if (sseu->max_slices == 0)
>> + return -ENODEV;
>> +
>> + switch (args->query_params[0]) {
>> + case I915_RCS_TOPOLOGY_SLICE:
>> + topology.params[0] = sseu->max_slices;
>> + data_size = sizeof(sseu->slice_mask);
>> + data = &sseu->slice_mask;
>
> Regardless of the solution, you will need to translate the internal
> kernel data into something strictly defined in the uAPI headers,
> otherwise we leak internal organization into ABI.
Sure, I'll add an assert to make sure that it is u8, so if it does
switch to u16/u32 at some point, people will remember to maintain the API.
>
>> + break;
>> +
>> + case I915_RCS_TOPOLOGY_SUBSLICE:
>> + topology.params[0] = sseu->max_slices;
>> + topology.params[1] = ALIGN(sseu->max_subslices, 8) / 8;
>> + data_size = sseu->max_slices * topology.params[1];
>> + data = sseu->subslices_mask;
>> + break;
>> +
>> + case I915_RCS_TOPOLOGY_EU:
>> + topology.params[2] = ALIGN(sseu->max_eus_per_subslice, 8) / 8;
>> + topology.params[1] = sseu->max_subslices * topology.params[2];
>> + topology.params[0] = sseu->max_slices;
>> + data_size = sseu->max_slices * topology.params[1];
>> + data = sseu->eu_mask;
>> + break;
>> +
>> + default:
>> + return -EINVAL;
>> + }
>> +
>> + total_size = sizeof(topology) + data_size;
>> +
>> + if (args->info_ptr_len == 0) {
>> + args->info_ptr_len = total_size;
>> + return 0;
>> + }
>> +
>> + if (args->info_ptr_len < total_size)
>> + return -EINVAL;
>> +
>> + ret = copy_to_user(user_topology, &topology, sizeof(topology));
>> + if (ret)
>> + return -EFAULT;
>> +
>> + ret = copy_to_user(user_topology + 1, data, data_size);
>> + if (ret)
>> + return -EFAULT;
>> +
>> + return 0;
>> +}
>> +
>> static u8 user_class_map[I915_ENGINE_CLASS_MAX] = {
>> [I915_ENGINE_CLASS_OTHER] = OTHER_CLASS,
>> [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
>> @@ -112,6 +174,8 @@ int i915_query_info_ioctl(struct drm_device *dev,
>> void *data,
>> switch (args->query) {
>> case I915_QUERY_INFO_ENGINE:
>> return query_info_engine(dev_priv, args);
>> + case I915_QUERY_INFO_RCS_TOPOLOGY:
>> + return query_info_rcs_topology(dev_priv, args);
>> default:
>> return -EINVAL;
>> }
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index c6026616300e..5b6a8995a948 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -1569,6 +1569,61 @@ struct drm_i915_engine_info {
>> __u8 rsvd2[6];
>> };
>> +/* Query RCS topology.
>> + *
>> + * drm_i915_query_info.query_params[0] should be set to one of the
>> + * I915_RCS_TOPOLOGY_* define.
>> + *
>> + * drm_i915_gem_query_info.info_ptr will be written to with
>> + * drm_i915_rcs_topology_info.
>> + */
>> +#define I915_QUERY_INFO_RCS_TOPOLOGY 1 /* version 1 */
>> +
>> +/* Query RCS slice topology
>> + *
>> + * The meaning of the drm_i915_rcs_topology_info fields is :
>> + *
>> + * params[0]: number of slices
>> + *
>> + * data: Each bit indicates whether a slice is available (1) or
>> fused off (0).
>> + * Formula to tell if slice X is available :
>> + *
>> + * (data[X / 8] >> (X % 8)) & 1
>> + */
>> +#define I915_RCS_TOPOLOGY_SLICE 0 /* version 1 */
>> +/* Query RCS subslice topology
>> + *
>> + * The meaning of the drm_i915_rcs_topology_info fields is :
>> + *
>> + * params[0]: number of slices
>> + * params[1]: slice stride
>> + *
>> + * data: each bit indicates whether a subslice is available (1) or
>> fused off
>> + * (0). Formula to tell if slice X subslice Y is available :
>> + *
>> + * (data[(X * params[1]) + Y / 8] >> (Y % 8)) & 1
>> + */
>> +#define I915_RCS_TOPOLOGY_SUBSLICE 1 /* version 1 */
>> +/* Query RCS EU topology
>> + *
>> + * The meaning of the drm_i915_rcs_topology_info fields is :
>> + *
>> + * params[0]: number of slices
>> + * params[1]: slice stride
>> + * params[2]: subslice stride
>> + *
>> + * data: Each bit indicates whether a subslice is available (1) or
>> fused off
>> + * (0). Formula to tell if slice X subslice Y eu Z is available :
>> + *
>> + * (data[X * params[1] + Y * params[2] + Z / 8] >> (Z % 8)) & 1
>> + */
>> +#define I915_RCS_TOPOLOGY_EU 2 /* version 1 */
>> +
>> +struct drm_i915_rcs_topology_info {
>> + __u32 params[6];
>> +
>> + __u8 data[];
>
> You don't use this marker in the patch.
>
> But in general would it be feasible to define and name the returned
> data more precisely? Like:
>
> struct drm_engine_rcs_engine_info {
> ...
> /existing_stuff/
> ...
>
> #define HAS_TOPOLOGY
> u32 flags;
>
> struct {
> u32 this;
> u32 that;
> ...
> u8 mask[SOME_FUTURE_PROOF_NUMBER];
> } slice_topology;
>
> struct {
> u32 this;
> u32 that;
> ...
> u8 mask[SOME_FUTURE_PROOF_NUMBER];
> } subslice_topology;
>
> struct {
> u32 this;
> u32 that;
> ...
> u8 mask[SOME_FUTURE_PROOF_NUMBER];
> } eu_topology;
> };
I'm pretty sure we'll need to expose more than these 3 properties here
(slice/subslice/eus) soon.
Some of the components residing in the subslice could be of interest.
So I'm reluctant to have all of this within a single struct which we
can't change the size of.
Having an enum for each property and possibly an associated struct for
each is fine though.
>
> That said, perhaps RCS topology belongs more to the GPU than the
> render engine.
>
> Regards,
>
> Tvrtko
>
>> +};
>> struct drm_i915_query_info {
>> /* in/out: Protocol version requested/supported. When set to 0,
>> the
>>
>
More information about the Intel-gfx
mailing list