[Intel-gfx] [PATCH i-g-t v2 2/2] intel_gpu_overlay: Update for class:instance engine tracepoints
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Wed Jun 6 10:29:02 UTC 2018
On 06/06/18 10:02, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> A miminal hack to parse the new tracepoint format and invent new "ring
> id's" based on engine class and instance.
>
> v2:
> * Make it a bit more future proof. (Lionel, Chris)
> * Some assorted fixups to show forgotten engines.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> overlay/gpu-perf.c | 40 ++++++++++++++++++++++++++++++++--------
> overlay/overlay.c | 17 +++++++++--------
> 2 files changed, 41 insertions(+), 16 deletions(-)
>
> diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c
> index ea3480050ab9..5629f826765e 100644
> --- a/overlay/gpu-perf.c
> +++ b/overlay/gpu-perf.c
> @@ -33,6 +33,7 @@
> #include <string.h>
> #include <fcntl.h>
> #include <errno.h>
> +#include <assert.h>
>
> #include "igt_perf.h"
>
> @@ -85,7 +86,8 @@ struct tracepoint {
>
> int device_field;
> int ctx_field;
> - int ring_field;
> + int class_field;
> + int instance_field;
> int seqno_field;
> int global_seqno_field;
> int plane_field;
> @@ -151,8 +153,10 @@ tracepoint_id(int tp_id)
> tp->device_field = f;
> } else if (!strcmp(tp->fields[f].name, "ctx")) {
> tp->ctx_field = f;
> - } else if (!strcmp(tp->fields[f].name, "ring")) {
> - tp->ring_field = f;
> + } else if (!strcmp(tp->fields[f].name, "class")) {
> + tp->class_field = f;
> + } else if (!strcmp(tp->fields[f].name, "instance")) {
> + tp->instance_field = f;
> } else if (!strcmp(tp->fields[f].name, "seqno")) {
> tp->seqno_field = f;
> } else if (!strcmp(tp->fields[f].name, "global_seqno")) {
> @@ -175,6 +179,26 @@ tracepoint_id(int tp_id)
> tracepoints[tp_id].fields[ \
> tracepoints[tp_id].field_name##_field].offset))
>
> +#define READ_TP_FIELD_U16(sample, tp_id, field_name) \
> + (*(const uint16_t *)((sample)->tracepoint_data + \
> + tracepoints[tp_id].fields[ \
> + tracepoints[tp_id].field_name##_field].offset))
> +
> +#define GET_RING_ID(sample, tp_id) \
> +({ \
> + unsigned char class, instance, ring; \
> +\
> + class = READ_TP_FIELD_U16(sample, tp_id, class); \
> + instance = READ_TP_FIELD_U16(sample, tp_id, instance); \
> +\
> + assert(class <= I915_ENGINE_CLASS_VIDEO_ENHANCE); \
> + assert(instance <= 4); \
> +\
> + ring = class * 4 + instance; \
> +\
> + ring; \
> +})
> +
> static int perf_tracepoint_open(struct gpu_perf *gp, int tp_id,
> int (*func)(struct gpu_perf *, const void *))
> {
> @@ -313,7 +337,7 @@ static int request_add(struct gpu_perf *gp, const void *event)
> if (comm == NULL)
> return 0;
>
> - comm->nr_requests[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_ADD, ring)]++;
> + comm->nr_requests[GET_RING_ID(sample, TP_GEM_REQUEST_ADD)]++;
> return 1;
> }
>
> @@ -329,7 +353,7 @@ static int ctx_switch(struct gpu_perf *gp, const void *event)
> {
> const struct sample_event *sample = event;
>
> - gp->ctx_switch[READ_TP_FIELD_U32(sample, TP_GEM_RING_SWITCH_CONTEXT, ring)]++;
> + gp->ctx_switch[GET_RING_ID(sample, TP_GEM_RING_SWITCH_CONTEXT)]++;
> return 1;
> }
>
> @@ -367,8 +391,8 @@ static int wait_begin(struct gpu_perf *gp, const void *event)
> wait->context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ctx);
> wait->seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, seqno);
> wait->time = sample->time;
> - wait->next = gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)];
> - gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)] = wait;
> + wait->next = gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)];
> + gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)] = wait;
>
> return 0;
> }
> @@ -377,7 +401,7 @@ static int wait_end(struct gpu_perf *gp, const void *event)
> {
> const struct sample_event *sample = event;
> struct gpu_perf_time *wait, **prev;
> - uint32_t engine = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ring);
> + uint32_t engine = GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_END);
> uint32_t context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ctx);
> uint32_t seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, seqno);
>
> diff --git a/overlay/overlay.c b/overlay/overlay.c
> index 545af7bcb2f5..eae5ddfa8823 100644
> --- a/overlay/overlay.c
> +++ b/overlay/overlay.c
> @@ -148,6 +148,7 @@ static void init_gpu_top(struct overlay_context *ctx,
> { 0.25, 1, 0.25, 1 },
> { 0.25, 0.25, 1, 1 },
> { 1, 1, 1, 1 },
> + { 1, 1, 0.25, 1 },
> };
> int n;
>
> @@ -311,11 +312,11 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *
> { 1, 1, 1, 1 },
> };
> struct gpu_perf_comm *comm, **prev;
> - const char *ring_name[] = {
> - "R",
> - "B",
> - "V0",
> - "V1",
> + const char *ring_name[MAX_RINGS] = {
> + "R", "?", "?", "?",
> + "B", "?", "?", "?",
> + "V0", "V1", "?", "?",
> + "VE0", "?", "?", "?",
> };
I guess if you defined a MAX_ENGINE_INSTANCES somewhere, you could just
have an array of class names and then generate ring_name off the n
variable further below.
class = n / MAX_ENGINE_INSTANCES
instance = n % MAX_ENGINE_INSTANCES
snprintf(ring_name, sizeof(ring_name), "%s%i", class_names[class],
instance);
Just a suggestion, regardless :
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> double range[2];
> char buf[1024];
> @@ -326,7 +327,7 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *
>
> gpu_perf_update(&gp->gpu_perf);
>
> - for (n = 0; n < 4; n++) {
> + for (n = 0; n < MAX_RINGS; n++) {
> if (gp->gpu_perf.ctx_switch[n])
> has_ctx = n + 1;
> if (gp->gpu_perf.flip_complete[n])
> @@ -389,7 +390,7 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *
> }
>
> total = 0;
> - for (n = 0; n < 3; n++)
> + for (n = 0; n < MAX_RINGS; n++)
> total += comm->nr_requests[n];
> chart_add_sample(comm->user_data, total);
> }
> @@ -433,7 +434,7 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *
> goto skip_comm;
>
> len = sprintf(buf, "%s:", comm->name);
> - for (n = 0; n < sizeof(ring_name)/sizeof(ring_name[0]); n++) {
> + for (n = 0; n < MAX_RINGS; n++) {
> if (comm->nr_requests[n] == 0)
> continue;
> len += sprintf(buf + len, "%s %d%s", need_comma ? "," : "", comm->nr_requests[n], ring_name[n]);
More information about the Intel-gfx
mailing list