[PATCH 02/21] drm/etnaviv: add uapi for perfmon feature

Christian Gmeiner christian.gmeiner at gmail.com
Sun Jul 2 10:04:28 UTC 2017


2017-06-26 14:56 GMT+02:00 Lucas Stach <l.stach at pengutronix.de>:
> Am Freitag, den 09.06.2017, 12:21 +0200 schrieb Christian Gmeiner:
>> Sadly we can not read any registers via command stream so we need
>> to extend the drm_etnaviv_gem_submit struct with performance monitor
>> requests. Those requests gets process before and/or after the actual
>> submitted command stream.
>>
>> The Vivante kernel driver has a special ioctl to read all perfmon
>> registers at once and return it. There is no connection between
>> a specifc command stream and the read perf values.
>>
>> Signed-off-by: Christian Gmeiner <christian.gmeiner at gmail.com>
>> ---
>>  include/uapi/drm/etnaviv_drm.h | 16 ++++++++++++++++
>>  1 file changed, 16 insertions(+)
>>
>> diff --git a/include/uapi/drm/etnaviv_drm.h
>> b/include/uapi/drm/etnaviv_drm.h
>> index 9e488a0..b67a7dd 100644
>> --- a/include/uapi/drm/etnaviv_drm.h
>> +++ b/include/uapi/drm/etnaviv_drm.h
>> @@ -150,6 +150,19 @@ struct drm_etnaviv_gem_submit_bo {
>>       __u64 presumed;       /* in/out, presumed buffer address */
>>  };
>>
>> +/* performance monitor request (pmr) */
>> +#define ETNA_PM_PROCESS_PRE             0x0001
>> +#define ETNA_PM_PROCESS_POST            0x0002
>> +struct drm_etnaviv_gem_submit_pmr {
>> +     __u32 flags;          /* in, when to process request
>> (ETNA_PM_PROCESS_x) */
>> +     __u8  domain;         /* in, pm domain */
>> +     __u8  signal;         /* in, pm signal */
>
> Are we sure that no domain will ever have more than 256 signals?
>

My crystal ball is not working as expected so I don't know - but I will
change it to a __u16.

>> +     __u16 pad[3];
>
> Unnecessary large pad. A single u16 is enough to naturally align the
> next member.
>

Correct.

>> +     __u32 sequence;       /* in, sequence number */
>> +     __u32 read_offset;    /* in, offset from read_bo */
>> +     __u32 read_idx;       /* in, index of read_bo buffer */
>> +};
>> +
>>  /* Each cmdstream submit consists of a table of buffers involved,
>> and
>>   * one or more cmdstream buffers.  This allows for conditional
>> execution
>>   * (context-restore), and IB buffers needed for per tile/bin draw
>> cmds.
>> @@ -175,6 +188,9 @@ struct drm_etnaviv_gem_submit {
>>       __u64 stream;         /* in, ptr to cmdstream */
>>       __u32 flags;          /* in, mask of ETNA_SUBMIT_x */
>>       __s32 fence_fd;       /* in/out, fence fd (see
>> ETNA_SUBMIT_FENCE_FD_x) */
>> +     __u64 pmrs;           /* in, ptr to array of submit_pmr's */
>> +     __u32 nr_pmrs;        /* in, number of submit_pmr's */
>> +     __u32 pad;
>
> This pad isn't needed.
>

 * Pad the entire struct to a multiple of 64-bits if the structure contains
   64-bit types - the structure size will otherwise differ on 32-bit versus
   64-bit. Having a different structure size hurts when passing arrays of
   structures to the kernel, or if the kernel checks the structure size, which
   e.g. the drm core does.

$ pahole drivers/gpu/drm/etnaviv/etnaviv.o -C drm_etnaviv_gem_submit
struct drm_etnaviv_gem_submit {
        __u32                      fence;                /*     0     4 */
        __u32                      pipe;                 /*     4     4 */
        __u32                      exec_state;           /*     8     4 */
        __u32                      nr_bos;               /*    12     4 */
        __u32                      nr_relocs;            /*    16     4 */
        __u32                      stream_size;          /*    20     4 */
        __u64                      bos;                  /*    24     8 */
        __u64                      relocs;               /*    32     8 */
        __u64                      stream;               /*    40     8 */
        __u32                      flags;                /*    48     4 */
        __s32                      fence_fd;             /*    52     4 */
        __u64                      pmrs;                 /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        __u32                      nr_pmrs;              /*    64     4 */
        __u32                      pad;                  /*    68     4 */

        /* size: 72, cachelines: 2, members: 14 */
        /* last cacheline: 8 bytes */
};


>>  };
>>
>>  /* The normal way to synchronize with the GPU is just to CPU_PREP on

greets
--
Christian Gmeiner, MSc

https://www.youtube.com/user/AloryOFFICIAL
https://soundcloud.com/christian-gmeiner


More information about the dri-devel mailing list