[Intel-gfx] [PATCH v2 4/9] drm/i915/perf: Group engines into respective OA groups
Dixit, Ashutosh
ashutosh.dixit at intel.com
Wed Feb 22 21:52:23 UTC 2023
On Thu, 16 Feb 2023 16:58:45 -0800, Umesh Nerlige Ramappa wrote:
>
Hi Umesh,
> Now that we may have multiple OA units in a single GT as well as on
> separate GTs, create an engine group that maps to a single OA unit.
>
> v2: (Jani)
> - Drop warning on ENOMEM
> - Reorder patch in the series
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 +
> drivers/gpu/drm/i915/gt/intel_sseu.c | 3 +-
> drivers/gpu/drm/i915/i915_perf.c | 124 +++++++++++++++++--
> drivers/gpu/drm/i915/i915_perf_types.h | 51 +++++++-
> 4 files changed, 169 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 4fd54fb8810f..8a8b0dce241b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -53,6 +53,8 @@ struct intel_gt;
> struct intel_ring;
> struct intel_uncore;
> struct intel_breadcrumbs;
> +struct intel_engine_cs;
> +struct i915_perf_group;
>
> typedef u32 intel_engine_mask_t;
> #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
> @@ -603,6 +605,8 @@ struct intel_engine_cs {
> } props, defaults;
>
> I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
> +
> + struct i915_perf_group *oa_group;
I think 'struct i915_oa_unit' is a better name (since it suggests a HW
entity), but since if we change we'll need to change everywhere so leave as
is with a comment to the effect that:
1 OA unit <-> 1 OA buffer <-> 1 perf group
> };
>
> static inline bool
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
> index 6c6198a257ac..1141f875f5bd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
> @@ -6,6 +6,7 @@
> #include <linux/string_helpers.h>
>
> #include "i915_drv.h"
> +#include "i915_perf_types.h"
> #include "intel_engine_regs.h"
> #include "intel_gt_regs.h"
> #include "intel_sseu.h"
> @@ -677,7 +678,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
> * If i915/perf is active, we want a stable powergating configuration
> * on the system. Use the configuration pinned by i915/perf.
> */
> - if (gt->perf.exclusive_stream)
> + if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
I haven't looked into what this function does, hopefully ok to do this only
for OAG?
> req_sseu = >->perf.sseu;
>
> slices = hweight8(req_sseu->slice_mask);
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 1229f65534e2..37c4cc44d68c 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1584,8 +1584,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
> {
> struct i915_perf *perf = stream->perf;
> struct intel_gt *gt = stream->engine->gt;
> + struct i915_perf_group *g = stream->engine->oa_group;
>
> - if (WARN_ON(stream != gt->perf.exclusive_stream))
> + if (WARN_ON(stream != g->exclusive_stream))
> return;
>
> /*
> @@ -1594,7 +1595,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
> *
> * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
> */
> - WRITE_ONCE(gt->perf.exclusive_stream, NULL);
> + WRITE_ONCE(g->exclusive_stream, NULL);
> perf->ops.disable_metric_set(stream);
>
> free_oa_buffer(stream);
> @@ -3192,6 +3193,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> {
> struct drm_i915_private *i915 = stream->perf->i915;
> struct i915_perf *perf = stream->perf;
> + struct i915_perf_group *g;
> struct intel_gt *gt;
> int ret;
>
> @@ -3202,6 +3204,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> }
> gt = props->engine->gt;
>
> + g = props->engine->oa_group;
> + if (!g) {
> + DRM_DEBUG("Perf group invalid\n");
> + return -EINVAL;
> + }
This check should be moved to the engine_supports_oa check in
read_properties_unlocked in "drm/i915/perf: Add engine class instance
parameters to perf". It basically duplicates that check I think.
Or rather, engine_supports_oa check should be now be re-written as follows
I think:
static bool engine_supports_oa(const struct intel_engine_cs *engine)
{
return engine->oa_group;
}
Since there are many more instances of engine_supports_oa calls.
If we do this in read_properties_unlocked we don't need the above check
here.
> +
> /*
> * If the sysfs metrics/ directory wasn't registered for some
> * reason then don't let userspace try their luck with config
> @@ -3231,7 +3239,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> * counter reports and marshal to the appropriate client
> * we currently only allow exclusive access
> */
> - if (gt->perf.exclusive_stream) {
> + if (g->exclusive_stream) {
> drm_dbg(&stream->perf->i915->drm,
> "OA unit already in use\n");
> return -EBUSY;
> @@ -3326,7 +3334,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> stream->ops = &i915_oa_stream_ops;
>
> stream->engine->gt->perf.sseu = props->sseu;
> - WRITE_ONCE(gt->perf.exclusive_stream, stream);
> + WRITE_ONCE(g->exclusive_stream, stream);
>
> ret = i915_perf_stream_enable_sync(stream);
> if (ret) {
> @@ -3349,7 +3357,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> return 0;
>
> err_enable:
> - WRITE_ONCE(gt->perf.exclusive_stream, NULL);
> + WRITE_ONCE(g->exclusive_stream, NULL);
> perf->ops.disable_metric_set(stream);
>
> free_oa_buffer(stream);
> @@ -3378,12 +3386,13 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
> const struct intel_engine_cs *engine)
> {
> struct i915_perf_stream *stream;
> + struct i915_perf_group *g = engine->oa_group;
>
> - if (!engine_supports_oa(engine))
> + if (!g)
> return;
>
> /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
> - stream = READ_ONCE(engine->gt->perf.exclusive_stream);
> + stream = READ_ONCE(g->exclusive_stream);
> if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
> gen8_update_reg_state_unlocked(ce, stream);
> }
> @@ -4753,6 +4762,95 @@ static struct ctl_table oa_table[] = {
> {}
> };
>
> +static u32 __num_perf_groups_per_gt(struct intel_gt *gt)
> +{
> + enum intel_platform platform = INTEL_INFO(gt->i915)->platform;
> +
> + switch (platform) {
> + default:
> + return 1;
> + }
I think in this function let us just say 'return 1' since we have not
introduced a value other than 1 in this series, so no need for the switch
statement I think.
> +}
> +
> +static u32 __oa_engine_group(struct intel_engine_cs *engine)
> +{
> + if (!engine_supports_oa(engine))
> + return PERF_GROUP_INVALID;
> +
> + switch (engine->class) {
> + case RENDER_CLASS:
> + return PERF_GROUP_OAG;
> +
> + default:
> + return PERF_GROUP_INVALID;
> + }
> +}
> +
> +static void oa_init_groups(struct intel_gt *gt)
> +{
> + int i, num_groups = gt->perf.num_perf_groups;
> + struct i915_perf *perf = >->i915->perf;
> +
> + for (i = 0; i < num_groups; i++) {
> + struct i915_perf_group *g = >->perf.group[i];
> +
> + /* Fused off engines can result in a group with num_engines == 0 */
> + if (g->num_engines == 0)
> + continue;
> +
> + /* Set oa_unit_ids now to ensure ids remain contiguous. */
> + g->oa_unit_id = perf->oa_unit_ids++;
> +
> + g->gt = gt;
> + }
> +}
> +
> +static int oa_init_gt(struct intel_gt *gt)
> +{
> + u32 num_groups = __num_perf_groups_per_gt(gt);
> + struct intel_engine_cs *engine;
> + struct i915_perf_group *g;
> + intel_engine_mask_t tmp;
> +
> + g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
> + if (!g)
> + return -ENOMEM;
> +
> + for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
> + u32 index;
> +
> + index = __oa_engine_group(engine);
> + if (index < num_groups) {
> + g[index].engine_mask |= BIT(engine->id);
> + g[index].num_engines++;
> + engine->oa_group = &g[index];
> + } else {
> + engine->oa_group = NULL;
> + }
We can avoid the else by initializing engine->oa_group to NULL at the start
of the for_each_engine_masked loop.
> + }
> +
> + gt->perf.num_perf_groups = num_groups;
> + gt->perf.group = g;
> +
> + oa_init_groups(gt);
> +
> + return 0;
> +}
> +
> +static int oa_init_engine_groups(struct i915_perf *perf)
> +{
> + struct intel_gt *gt;
> + int i, ret;
> +
> + for_each_gt(gt, perf->i915, i) {
> + ret = oa_init_gt(gt);
> + if (ret)
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> static void oa_init_supported_formats(struct i915_perf *perf)
> {
> struct drm_i915_private *i915 = perf->i915;
> @@ -4919,7 +5017,7 @@ void i915_perf_init(struct drm_i915_private *i915)
>
> if (perf->ops.enable_metric_set) {
> struct intel_gt *gt;
> - int i;
> + int i, ret;
>
> for_each_gt(gt, i915, i)
> mutex_init(>->perf.lock);
> @@ -4958,6 +5056,11 @@ void i915_perf_init(struct drm_i915_private *i915)
>
> perf->i915 = i915;
>
> + ret = oa_init_engine_groups(perf);
> + if (ret)
> + drm_err(&i915->drm,
> + "OA initialization failed %d\n", ret);
> +
> oa_init_supported_formats(perf);
> }
> }
> @@ -4986,10 +5089,15 @@ void i915_perf_sysctl_unregister(void)
> void i915_perf_fini(struct drm_i915_private *i915)
> {
> struct i915_perf *perf = &i915->perf;
> + struct intel_gt *gt;
> + int i;
>
> if (!perf->i915)
> return;
>
> + for_each_gt(gt, perf->i915, i)
> + kfree(gt->perf.group);
> +
> idr_for_each(&perf->metrics_idr, destroy_config, perf);
> idr_destroy(&perf->metrics_idr);
>
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
> index e36f046fe2b6..ce99551ad0fd 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -17,6 +17,7 @@
> #include <linux/wait.h>
> #include <uapi/drm/i915_drm.h>
>
> +#include "gt/intel_engine_types.h"
> #include "gt/intel_sseu.h"
> #include "i915_reg_defs.h"
> #include "intel_wakeref.h"
> @@ -30,6 +31,13 @@ struct i915_vma;
> struct intel_context;
> struct intel_engine_cs;
>
For below 'assigned but not used comments' I am basing this on this patch
and the last OAM patch 9/9.
> +enum {
> + PERF_GROUP_OAG = 0,
> +
> + PERF_GROUP_MAX,
> + PERF_GROUP_INVALID = U32_MAX,
> +};
> +
> struct i915_oa_format {
> u32 format;
> int size;
> @@ -390,6 +398,35 @@ struct i915_oa_ops {
> u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
> };
>
> +struct i915_perf_group {
> + /*
> + * @type: Identifier for the OA unit.
> + */
> + u32 oa_unit_id;
Assigned but not used, should be removed and introduced when needed.
> +
> + /*
> + * @gt: gt that this group belongs to
> + */
> + struct intel_gt *gt;
Not used either, suggest removing.
> +
> + /*
> + * @exclusive_stream: The stream currently using the OA unit. This is
> + * sometimes accessed outside a syscall associated to its file
> + * descriptor.
> + */
> + struct i915_perf_stream *exclusive_stream;
> +
> + /*
> + * @num_engines: The number of engines using this OA buffer.
s/OA buffer/OA unit/
> + */
> + u32 num_engines;
> +
> + /*
> + * @engine_mask: A mask of engines using a single OA buffer.
s/OA buffer/OA unit/
> + */
> + intel_engine_mask_t engine_mask;
Assigned but not used, should be removed and introduced when needed.
> +};
> +
> struct i915_perf_gt {
> /*
> * Lock associated with anything below within this structure.
> @@ -402,12 +439,15 @@ struct i915_perf_gt {
> */
> struct intel_sseu sseu;
>
> + /**
> + * @num_perf_groups: number of perf groups per gt.
> + */
> + u32 num_perf_groups;
This is 1 in this series so you could argue not needed but I think we know
some future platforms where there might be > 1 so I think we can leave it
in.
> +
> /*
> - * @exclusive_stream: The stream currently using the OA unit. This is
> - * sometimes accessed outside a syscall associated to its file
> - * descriptor.
> + * @group: list of OA groups - one for each OA buffer.
> */
> - struct i915_perf_stream *exclusive_stream;
> + struct i915_perf_group *group;
> };
>
> struct i915_perf {
> @@ -461,6 +501,9 @@ struct i915_perf {
> unsigned long format_mask[FORMAT_MASK_SIZE];
>
> atomic64_t noa_programming_delay;
> +
> + /* oa unit ids */
> + u32 oa_unit_ids;
Assigned but not used, should be removed, because oa_unit_id is
unused. Also if we need to do this later maybe idr is a better approach?
Also, if we remove the above members as suggested, oa_init_groups will
probably need to move to the last patch 9.
> };
>
> #endif /* _I915_PERF_TYPES_H_ */
> --
> 2.36.1
>
Thanks.
--
Ashutosh
More information about the Intel-gfx
mailing list