[Intel-gfx] [PATCH i-g-t v2 2/2] intel_gpu_overlay: Update for class:instance engine tracepoints
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Jun 6 11:01:39 UTC 2018
On 06/06/2018 11:29, Lionel Landwerlin wrote:
> On 06/06/18 10:02, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> A miminal hack to parse the new tracepoint format and invent new "ring
>> id's" based on engine class and instance.
>>
>> v2:
>> * Make it a bit more future proof. (Lionel, Chris)
>> * Some assorted fixups to show forgotten engines.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
>> overlay/gpu-perf.c | 40 ++++++++++++++++++++++++++++++++--------
>> overlay/overlay.c | 17 +++++++++--------
>> 2 files changed, 41 insertions(+), 16 deletions(-)
>>
>> diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c
>> index ea3480050ab9..5629f826765e 100644
>> --- a/overlay/gpu-perf.c
>> +++ b/overlay/gpu-perf.c
>> @@ -33,6 +33,7 @@
>> #include <string.h>
>> #include <fcntl.h>
>> #include <errno.h>
>> +#include <assert.h>
>> #include "igt_perf.h"
>> @@ -85,7 +86,8 @@ struct tracepoint {
>> int device_field;
>> int ctx_field;
>> - int ring_field;
>> + int class_field;
>> + int instance_field;
>> int seqno_field;
>> int global_seqno_field;
>> int plane_field;
>> @@ -151,8 +153,10 @@ tracepoint_id(int tp_id)
>> tp->device_field = f;
>> } else if (!strcmp(tp->fields[f].name, "ctx")) {
>> tp->ctx_field = f;
>> - } else if (!strcmp(tp->fields[f].name, "ring")) {
>> - tp->ring_field = f;
>> + } else if (!strcmp(tp->fields[f].name, "class")) {
>> + tp->class_field = f;
>> + } else if (!strcmp(tp->fields[f].name, "instance")) {
>> + tp->instance_field = f;
>> } else if (!strcmp(tp->fields[f].name, "seqno")) {
>> tp->seqno_field = f;
>> } else if (!strcmp(tp->fields[f].name, "global_seqno")) {
>> @@ -175,6 +179,26 @@ tracepoint_id(int tp_id)
>> tracepoints[tp_id].fields[ \
>> tracepoints[tp_id].field_name##_field].offset))
>> +#define READ_TP_FIELD_U16(sample, tp_id, field_name) \
>> + (*(const uint16_t *)((sample)->tracepoint_data + \
>> + tracepoints[tp_id].fields[ \
>> + tracepoints[tp_id].field_name##_field].offset))
>> +
>> +#define GET_RING_ID(sample, tp_id) \
>> +({ \
>> + unsigned char class, instance, ring; \
>> +\
>> + class = READ_TP_FIELD_U16(sample, tp_id, class); \
>> + instance = READ_TP_FIELD_U16(sample, tp_id, instance); \
>> +\
>> + assert(class <= I915_ENGINE_CLASS_VIDEO_ENHANCE); \
>> + assert(instance <= 4); \
>> +\
>> + ring = class * 4 + instance; \
>> +\
>> + ring; \
>> +})
>> +
>> static int perf_tracepoint_open(struct gpu_perf *gp, int tp_id,
>> int (*func)(struct gpu_perf *, const void *))
>> {
>> @@ -313,7 +337,7 @@ static int request_add(struct gpu_perf *gp, const
>> void *event)
>> if (comm == NULL)
>> return 0;
>> - comm->nr_requests[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_ADD,
>> ring)]++;
>> + comm->nr_requests[GET_RING_ID(sample, TP_GEM_REQUEST_ADD)]++;
>> return 1;
>> }
>> @@ -329,7 +353,7 @@ static int ctx_switch(struct gpu_perf *gp, const
>> void *event)
>> {
>> const struct sample_event *sample = event;
>> - gp->ctx_switch[READ_TP_FIELD_U32(sample,
>> TP_GEM_RING_SWITCH_CONTEXT, ring)]++;
>> + gp->ctx_switch[GET_RING_ID(sample, TP_GEM_RING_SWITCH_CONTEXT)]++;
>> return 1;
>> }
>> @@ -367,8 +391,8 @@ static int wait_begin(struct gpu_perf *gp, const
>> void *event)
>> wait->context = READ_TP_FIELD_U32(sample,
>> TP_GEM_REQUEST_WAIT_BEGIN, ctx);
>> wait->seqno = READ_TP_FIELD_U32(sample,
>> TP_GEM_REQUEST_WAIT_BEGIN, seqno);
>> wait->time = sample->time;
>> - wait->next = gp->wait[READ_TP_FIELD_U32(sample,
>> TP_GEM_REQUEST_WAIT_BEGIN, ring)];
>> - gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN,
>> ring)] = wait;
>> + wait->next = gp->wait[GET_RING_ID(sample,
>> TP_GEM_REQUEST_WAIT_BEGIN)];
>> + gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)] = wait;
>> return 0;
>> }
>> @@ -377,7 +401,7 @@ static int wait_end(struct gpu_perf *gp, const
>> void *event)
>> {
>> const struct sample_event *sample = event;
>> struct gpu_perf_time *wait, **prev;
>> - uint32_t engine = READ_TP_FIELD_U32(sample,
>> TP_GEM_REQUEST_WAIT_END, ring);
>> + uint32_t engine = GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_END);
>> uint32_t context = READ_TP_FIELD_U32(sample,
>> TP_GEM_REQUEST_WAIT_END, ctx);
>> uint32_t seqno = READ_TP_FIELD_U32(sample,
>> TP_GEM_REQUEST_WAIT_END, seqno);
>> diff --git a/overlay/overlay.c b/overlay/overlay.c
>> index 545af7bcb2f5..eae5ddfa8823 100644
>> --- a/overlay/overlay.c
>> +++ b/overlay/overlay.c
>> @@ -148,6 +148,7 @@ static void init_gpu_top(struct overlay_context *ctx,
>> { 0.25, 1, 0.25, 1 },
>> { 0.25, 0.25, 1, 1 },
>> { 1, 1, 1, 1 },
>> + { 1, 1, 0.25, 1 },
>> };
>> int n;
>> @@ -311,11 +312,11 @@ static void show_gpu_perf(struct overlay_context
>> *ctx, struct overlay_gpu_perf *
>> { 1, 1, 1, 1 },
>> };
>> struct gpu_perf_comm *comm, **prev;
>> - const char *ring_name[] = {
>> - "R",
>> - "B",
>> - "V0",
>> - "V1",
>> + const char *ring_name[MAX_RINGS] = {
>> + "R", "?", "?", "?",
>> + "B", "?", "?", "?",
>> + "V0", "V1", "?", "?",
>> + "VE0", "?", "?", "?",
>> };
>
> I guess if you defined a MAX_ENGINE_INSTANCES somewhere, you could just
> have an array of class names and then generate ring_name off the n
> variable further below.
>
> class = n / MAX_ENGINE_INSTANCES
> instance = n % MAX_ENGINE_INSTANCES
>
> snprintf(ring_name, sizeof(ring_name), "%s%i", class_names[class],
> instance);
>
>
> Just a suggestion, regardless :
>
> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Thanks! Above is a completely reasonable suggestion but I have pushed
this for now just so the tool doesn't segfault for now. I suspect there
are more issues lurking in there if it is to be made really future proof.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list