[igt-dev] [PATCH i-g-t v5 26/30] lib/i915/perf: Enable multi-tile support for perf library
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Thu Mar 23 22:55:30 UTC 2023
Add class instance awareness to perf library and enable GPUvis to select
specific GT in perf recorder.
v2: Use gt 0 for gem_barrier_race test since the race is independent of
gt used. (Umesh)
v3: Use i915_drm_local.h in perf tools
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
---
lib/i915/perf.c | 15 ++++--
lib/i915/perf.h | 2 +-
tests/core_hotunplug.c | 2 +-
tests/i915/gem_barrier_race.c | 2 +-
tests/i915/perf.c | 2 +-
tools/i915-perf/i915_perf_configs.c | 2 +-
tools/i915-perf/i915_perf_recorder.c | 74 ++++++++++++++++++++++++++--
7 files changed, 84 insertions(+), 15 deletions(-)
diff --git a/lib/i915/perf.c b/lib/i915/perf.c
index 6c7a1925..d8624dc0 100644
--- a/lib/i915/perf.c
+++ b/lib/i915/perf.c
@@ -574,7 +574,7 @@ typedef enum {
RPS_MAX_ATTR,
} intel_sysfs_attr_id;
-static const char *intel_sysfs_attr_name[2][RPS_MAX_ATTR] =
+static const char *intel_sysfs_attr_name[][RPS_MAX_ATTR] =
{
{
"gt_min_freq_mhz",
@@ -584,20 +584,25 @@ static const char *intel_sysfs_attr_name[2][RPS_MAX_ATTR] =
"gt/gt0/rps_min_freq_mhz",
"gt/gt0/rps_max_freq_mhz",
},
+ {
+ "gt/gt1/rps_min_freq_mhz",
+ "gt/gt1/rps_max_freq_mhz",
+ },
};
static const char *
-intel_sysfs_attr_id_to_name(int sysfs_dirfd, intel_sysfs_attr_id id)
+intel_sysfs_attr_id_to_name(int sysfs_dirfd, intel_sysfs_attr_id id, int gt)
{
assert(id < RPS_MAX_ATTR);
+ assert(gt < sizeof(intel_sysfs_attr_name) - 1);
return !faccessat(sysfs_dirfd, "gt", O_RDONLY, 0) ?
- intel_sysfs_attr_name[1][id] :
+ intel_sysfs_attr_name[gt + 1][id] :
intel_sysfs_attr_name[0][id];
}
struct intel_perf *
-intel_perf_for_fd(int drm_fd)
+intel_perf_for_fd(int drm_fd, int gt)
{
uint32_t device_id;
uint32_t device_revision;
@@ -612,7 +617,7 @@ intel_perf_for_fd(int drm_fd)
return NULL;
#define read_sysfs_rps(fd, id, value) \
- read_sysfs(fd, intel_sysfs_attr_id_to_name(fd, id), value)
+ read_sysfs(fd, intel_sysfs_attr_id_to_name(fd, id, gt), value)
if (!read_sysfs_rps(sysfs_dir_fd, RPS_MIN_FREQ_MHZ, >_min_freq) ||
!read_sysfs_rps(sysfs_dir_fd, RPS_MAX_FREQ_MHZ, >_max_freq)) {
diff --git a/lib/i915/perf.h b/lib/i915/perf.h
index e6e60dc9..df5b6b96 100644
--- a/lib/i915/perf.h
+++ b/lib/i915/perf.h
@@ -316,7 +316,7 @@ intel_perf_devinfo_eu_available(const struct intel_perf_devinfo *devinfo,
return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
}
-struct intel_perf *intel_perf_for_fd(int drm_fd);
+struct intel_perf *intel_perf_for_fd(int drm_fd, int gt);
struct intel_perf *intel_perf_for_devinfo(uint32_t device_id,
uint32_t revision,
uint64_t timestamp_frequency,
diff --git a/tests/core_hotunplug.c b/tests/core_hotunplug.c
index ebb646b5..e79eb1ed 100644
--- a/tests/core_hotunplug.c
+++ b/tests/core_hotunplug.c
@@ -377,7 +377,7 @@ static bool local_i915_perf_healthcheck(int i915)
{
struct intel_perf *intel_perf;
- intel_perf = intel_perf_for_fd(i915);
+ intel_perf = intel_perf_for_fd(i915, 0);
if (intel_perf)
intel_perf_free(intel_perf);
return intel_perf;
diff --git a/tests/i915/gem_barrier_race.c b/tests/i915/gem_barrier_race.c
index 053fa206..f446aab6 100644
--- a/tests/i915/gem_barrier_race.c
+++ b/tests/i915/gem_barrier_race.c
@@ -29,7 +29,7 @@ static void remote_request_workload(int fd, int *done)
* Based on code patterns found in tests/i915/perf.c
*/
struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
- struct intel_perf *intel_perf = intel_perf_for_fd(fd);
+ struct intel_perf *intel_perf = intel_perf_for_fd(fd, 0);
uint64_t properties[] = {
DRM_I915_PERF_PROP_SAMPLE_OA, true,
DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index ce58a6b8..9ca4d34b 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -1123,7 +1123,7 @@ init_sys_info(void)
{
igt_assert_neq(devid, 0);
- intel_perf = intel_perf_for_fd(drm_fd);
+ intel_perf = intel_perf_for_fd(drm_fd, 0);
igt_require(intel_perf);
igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
diff --git a/tools/i915-perf/i915_perf_configs.c b/tools/i915-perf/i915_perf_configs.c
index bce3bd0f..0db9a126 100644
--- a/tools/i915-perf/i915_perf_configs.c
+++ b/tools/i915-perf/i915_perf_configs.c
@@ -230,7 +230,7 @@ main(int argc, char *argv[])
fprintf(stdout, "Device graphics_ver=%i gt=%i\n", devinfo->graphics_ver, devinfo->gt);
- perf = intel_perf_for_fd(drm_fd);
+ perf = intel_perf_for_fd(drm_fd, 0);
if (!perf) {
fprintf(stderr, "No perf data found.\n");
return EXIT_FAILURE;
diff --git a/tools/i915-perf/i915_perf_recorder.c b/tools/i915-perf/i915_perf_recorder.c
index d16f1546..ca435483 100644
--- a/tools/i915-perf/i915_perf_recorder.c
+++ b/tools/i915-perf/i915_perf_recorder.c
@@ -26,6 +26,7 @@
#include <fcntl.h>
#include <getopt.h>
#include <inttypes.h>
+#include <limits.h>
#include <poll.h>
#include <signal.h>
#include <stdbool.h>
@@ -43,6 +44,8 @@
#include <i915_drm.h>
+#include "i915/i915_drm_local.h"
+
#include "igt_core.h"
#include "intel_chipset.h"
#include "i915/perf.h"
@@ -408,6 +411,9 @@ struct recording_context {
int command_fifo_fd;
uint64_t poll_period;
+
+ struct i915_engine_class_instance engine;
+ int gt;
};
static int
@@ -449,6 +455,13 @@ perf_open(struct recording_context *ctx)
properties[p++] = ctx->poll_period;
}
+ if (revision >= 6 && ctx->engine.engine_class >= 0 && ctx->engine.engine_instance >= 0) {
+ properties[p++] = DRM_I915_PERF_PROP_OA_ENGINE_CLASS;
+ properties[p++] = ctx->engine.engine_class;
+ properties[p++] = DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE;
+ properties[p++] = ctx->engine.engine_instance;
+ }
+
memset(¶m, 0, sizeof(param));
param.flags = 0;
param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
@@ -497,8 +510,8 @@ write_header(FILE *output, struct recording_context *ctx)
.gt_min_frequency = ctx->perf->devinfo.gt_min_freq,
.gt_max_frequency = ctx->perf->devinfo.gt_max_freq,
.oa_format = ctx->metric_set->perf_oa_format,
- .engine_class = I915_ENGINE_CLASS_RENDER,
- .engine_instance = 0,
+ .engine_class = ctx->engine.engine_class,
+ .engine_instance = ctx->engine.engine_instance,
};
struct drm_i915_perf_record_header header = {
.type = INTEL_PERF_RECORD_TYPE_DEVICE_INFO,
@@ -805,7 +818,9 @@ usage(const char *name)
" Values: boot, mono, mono_raw (default = mono)\n"
" --poll-period -P <value> Polling interval in microseconds used by a timer in the driver to query\n"
" for OA reports periodically\n"
- " (default = 5000), Minimum = 100.\n",
+ " (default = 5000), Minimum = 100.\n"
+ " --engine-class -e <value> Engine class used for the OA capture.\n"
+ " --engine-instance -i <value> Engine instance used for the OA capture.\n",
name);
}
@@ -834,6 +849,33 @@ teardown_recording_context(struct recording_context *ctx)
close(ctx->drm_fd);
}
+static int
+mtl_engine_to_gt(const struct i915_engine_class_instance *engine)
+{
+ switch (engine->engine_class) {
+ case I915_ENGINE_CLASS_RENDER:
+ return 0;
+ case I915_ENGINE_CLASS_VIDEO:
+ case I915_ENGINE_CLASS_VIDEO_ENHANCE:
+ return 1;
+ default:
+ return -1;
+ }
+}
+
+/* static mapping as in igt core library until a different way is available */
+static int
+engine_to_gt(struct recording_context *ctx)
+{
+ if (ctx->devinfo->is_meteorlake)
+ return mtl_engine_to_gt(&ctx->engine);
+ else if (ctx->engine.engine_class == I915_ENGINE_CLASS_RENDER &&
+ ctx->engine.engine_instance == 0)
+ return 0;
+
+ return -1;
+}
+
int
main(int argc, char *argv[])
{
@@ -849,6 +891,8 @@ main(int argc, char *argv[])
{"command-fifo", required_argument, 0, 'f'},
{"cpu-clock", required_argument, 0, 'k'},
{"poll-period", required_argument, 0, 'P'},
+ {"engine-class", required_argument, 0, 'e'},
+ {"engine-instance", required_argument, 0, 'i'},
{0, 0, 0, 0}
};
const struct {
@@ -878,9 +922,10 @@ main(int argc, char *argv[])
/* 5 ms poll period */
.poll_period = 5 * 1000 * 1000,
+ .engine = { USHRT_MAX, USHRT_MAX },
};
- while ((opt = getopt_long(argc, argv, "hc:d:p:m:Co:s:f:k:P:", long_options, NULL)) != -1) {
+ while ((opt = getopt_long(argc, argv, "hc:d:p:m:Co:s:f:k:P:e:i:", long_options, NULL)) != -1) {
switch (opt) {
case 'h':
usage(argv[0]);
@@ -931,6 +976,12 @@ main(int argc, char *argv[])
case 'P':
ctx.poll_period = MAX(100, atol(optarg)) * 1000;
break;
+ case 'e':
+ ctx.engine.engine_class = atoi(optarg);
+ break;
+ case 'i':
+ ctx.engine.engine_instance = atoi(optarg);
+ break;
default:
fprintf(stderr, "Internal error: "
"unexpected getopt value: %d\n", opt);
@@ -944,6 +995,12 @@ main(int argc, char *argv[])
return EXIT_SUCCESS;
}
+ if (ctx.engine.engine_class == USHRT_MAX ||
+ ctx.engine.engine_instance == USHRT_MAX) {
+ ctx.engine.engine_class = I915_ENGINE_CLASS_RENDER;
+ ctx.engine.engine_instance = 0;
+ }
+
ctx.drm_fd = open_render_node(&ctx.devid, dev_node_id);
if (ctx.drm_fd < 0) {
fprintf(stderr, "Unable to open device.\n");
@@ -956,6 +1013,13 @@ main(int argc, char *argv[])
goto fail;
}
+ ctx.gt = engine_to_gt(&ctx);
+ if (ctx.gt < 0) {
+ fprintf(stderr, "Unsupported engine class:instance %d:%d.\n",
+ ctx.engine.engine_class, ctx.engine.engine_instance);
+ goto fail;
+ }
+
fprintf(stdout, "Device name=%s gen=%i gt=%i id=0x%x\n",
ctx.devinfo->codename, ctx.devinfo->graphics_ver, ctx.devinfo->gt, ctx.devid);
@@ -965,7 +1029,7 @@ main(int argc, char *argv[])
goto fail;
}
- ctx.perf = intel_perf_for_fd(ctx.drm_fd);
+ ctx.perf = intel_perf_for_fd(ctx.drm_fd, ctx.gt);
if (!ctx.perf) {
fprintf(stderr, "No perf data found.\n");
goto fail;
--
2.36.1
More information about the igt-dev
mailing list