[PATCH RFC 003/111] staging: etnaviv: add drm driver

Tue Apr 7 02:20:10 PDT 2015

Am Dienstag, den 07.04.2015, 11:04 +0200 schrieb Christian Gmeiner:
> Hi Lucas
> 
> 2015-04-07 9:35 GMT+02:00 Lucas Stach <l.stach at pengutronix.de>:
> > Am Sonntag, den 05.04.2015, 21:26 +0200 schrieb Christian Gmeiner:
> >> 2015-04-02 17:29 GMT+02:00 Lucas Stach <l.stach at pengutronix.de>:
> >> > From: Christian Gmeiner <christian.gmeiner at gmail.com>
> >> >
> >> > This is a consolidation by Russell King of Christian's drm work.
> >> >
> >> > Signed-off-by: Christian Gmeiner <christian.gmeiner at gmail.com>
> >> > Signed-off-by: Russell King <rmk+kernel at arm.linux.org.uk>
> >
> >> > ---
> > [...]
> >
> >> > +#endif /* STATE_HI_XML */
> >> > diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
> >> > new file mode 100644
> >> > index 000000000000..f7b5ac6f3842
> >> > --- /dev/null
> >> > +++ b/include/uapi/drm/etnaviv_drm.h
> >> > @@ -0,0 +1,225 @@
> >> > +/*
> >> > + * Copyright (C) 2013 Red Hat
> >> > + * Author: Rob Clark <robdclark at gmail.com>
> >> > + *
> >> > + * This program is free software; you can redistribute it and/or modify it
> >> > + * under the terms of the GNU General Public License version 2 as published by
> >> > + * the Free Software Foundation.
> >> > + *
> >> > + * This program is distributed in the hope that it will be useful, but WITHOUT
> >> > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> >> > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> >> > + * more details.
> >> > + *
> >> > + * You should have received a copy of the GNU General Public License along with
> >> > + * this program.  If not, see <http://www.gnu.org/licenses/>.
> >> > + */
> >> > +
> >> > +#ifndef __ETNAVIV_DRM_H__
> >> > +#define __ETNAVIV_DRM_H__
> >> > +
> >> > +#include <stddef.h>
> >> > +#include <drm/drm.h>
> >> > +
> >> > +/* Please note that modifications to all structs defined here are
> >> > + * subject to backwards-compatibility constraints:
> >> > + *  1) Do not use pointers, use uint64_t instead for 32 bit / 64 bit
> >> > + *     user/kernel compatibility
> >> > + *  2) Keep fields aligned to their size
> >> > + *  3) Because of how drm_ioctl() works, we can add new fields at
> >> > + *     the end of an ioctl if some care is taken: drm_ioctl() will
> >> > + *     zero out the new fields at the tail of the ioctl, so a zero
> >> > + *     value should have a backwards compatible meaning.  And for
> >> > + *     output params, userspace won't see the newly added output
> >> > + *     fields.. so that has to be somehow ok.
> >> > + */
> >> > +
> >> > +#define ETNA_PIPE_3D      0x00
> >> > +#define ETNA_PIPE_2D      0x01
> >> > +#define ETNA_PIPE_VG      0x02
> >> > +
> >> > +#define ETNA_MAX_PIPES    3
> >> > +
> >> > +/* timeouts are specified in clock-monotonic absolute times (to simplify
> >> > + * restarting interrupted ioctls).  The following struct is logically the
> >> > + * same as 'struct timespec' but 32/64b ABI safe.
> >> > + */
> >> > +struct drm_etnaviv_timespec {
> >> > +       int64_t tv_sec;          /* seconds */
> >> > +       int64_t tv_nsec;         /* nanoseconds */
> >> > +};
> >> > +
> >> > +#define ETNAVIV_PARAM_GPU_MODEL                     0x01
> >> > +#define ETNAVIV_PARAM_GPU_REVISION                  0x02
> >> > +#define ETNAVIV_PARAM_GPU_FEATURES_0                0x03
> >> > +#define ETNAVIV_PARAM_GPU_FEATURES_1                0x04
> >> > +#define ETNAVIV_PARAM_GPU_FEATURES_2                0x05
> >> > +#define ETNAVIV_PARAM_GPU_FEATURES_3                0x06
> >> > +#define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
> >> > +
> >> > +#define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
> >> > +#define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
> >> > +#define ETNAVIV_PARAM_GPU_THREAD_COUNT              0x12
> >> > +#define ETNAVIV_PARAM_GPU_VERTEX_CACHE_SIZE         0x13
> >> > +#define ETNAVIV_PARAM_GPU_SHADER_CORE_COUNT         0x14
> >> > +#define ETNAVIV_PARAM_GPU_PIXEL_PIPES               0x15
> >> > +#define ETNAVIV_PARAM_GPU_VERTEX_OUTPUT_BUFFER_SIZE 0x16
> >> > +#define ETNAVIV_PARAM_GPU_BUFFER_SIZE               0x17
> >> > +#define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
> >> > +#define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
> >> > +
> >> > +//#define MSM_PARAM_GMEM_SIZE  0x02
> >> > +
> >> > +struct drm_etnaviv_param {
> >> > +       uint32_t pipe;           /* in, ETNA_PIPE_x */
> >> > +       uint32_t param;          /* in, ETNAVIV_PARAM_x */
> >> > +       uint64_t value;          /* out (get_param) or in (set_param) */
> >> > +};
> >> > +
> >> > +/*
> >> > + * GEM buffers:
> >> > + */
> >> > +
> >> > +#define ETNA_BO_CMDSTREAM    0x00000001
> >> > +#define ETNA_BO_CACHE_MASK   0x000f0000
> >> > +/* cache modes */
> >> > +#define ETNA_BO_CACHED       0x00010000
> >> > +#define ETNA_BO_WC           0x00020000
> >> > +#define ETNA_BO_UNCACHED     0x00040000
> >> > +
> >> > +struct drm_etnaviv_gem_new {
> >> > +       uint64_t size;           /* in */
> >> > +       uint32_t flags;          /* in, mask of ETNA_BO_x */
> >> > +       uint32_t handle;         /* out */
> >> > +};
> >> > +
> >> > +struct drm_etnaviv_gem_info {
> >> > +       uint32_t handle;         /* in */
> >> > +       uint32_t pad;
> >> > +       uint64_t offset;         /* out, offset to pass to mmap() */
> >> > +};
> >> > +
> >> > +#define ETNA_PREP_READ        0x01
> >> > +#define ETNA_PREP_WRITE       0x02
> >> > +#define ETNA_PREP_NOSYNC      0x04
> >> > +
> >> > +struct drm_etnaviv_gem_cpu_prep {
> >> > +       uint32_t handle;         /* in */
> >> > +       uint32_t op;             /* in, mask of ETNA_PREP_x */
> >> > +       struct drm_etnaviv_timespec timeout;   /* in */
> >> > +};
> >> > +
> >> > +struct drm_etnaviv_gem_cpu_fini {
> >> > +       uint32_t handle;         /* in */
> >> > +};
> >> > +
> >> > +/*
> >> > + * Cmdstream Submission:
> >> > + */
> >> > +
> >> > +/* The value written into the cmdstream is logically:
> >> > + *
> >> > + *   ((relocbuf->gpuaddr + reloc_offset) << shift) | or
> >> > + *
> >> > + * When we have GPU's w/ >32bit ptrs, it should be possible to deal
> >> > + * with this by emit'ing two reloc entries with appropriate shift
> >> > + * values.  Or a new ETNA_SUBMIT_CMD_x type would also be an option.
> >> > + *
> >> > + * NOTE that reloc's must be sorted by order of increasing submit_offset,
> >> > + * otherwise EINVAL.
> >> > + */
> >> > +struct drm_etnaviv_gem_submit_reloc {
> >> > +       uint32_t submit_offset;  /* in, offset from submit_bo */
> >> > +       uint32_t or;             /* in, value OR'd with result */
> >> > +       int32_t  shift;          /* in, amount of left shift (can be negative) */
> >> > +       uint32_t reloc_idx;      /* in, index of reloc_bo buffer */
> >> > +       uint64_t reloc_offset;   /* in, offset from start of reloc_bo */
> >> > +};
> >> > +
> >> > +/* submit-types:
> >> > + *   BUF - this cmd buffer is executed normally.
> >> > + *   IB_TARGET_BUF - this cmd buffer is an IB target.  Reloc's are
> >> > + *      processed normally, but the kernel does not setup an IB to
> >> > + *      this buffer in the first-level ringbuffer
> >> > + *   CTX_RESTORE_BUF - only executed if there has been a GPU context
> >> > + *      switch since the last SUBMIT ioctl
> >> > + */
> >> > +#define ETNA_SUBMIT_CMD_BUF             0x0001
> >> > +#define ETNA_SUBMIT_CMD_IB_TARGET_BUF   0x0002
> >> > +#define ETNA_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003 is a
> >> > +struct drm_etnaviv_gem_submit_cmd {
> >> > +       uint32_t type;           /* in, one of ETNA_SUBMIT_CMD_x */
> >>
> >> Do we need different types? I did not use this in my kernel tree.
> >>
> >
> > Please also not the commit cleaning this API, which changes this a bit.
> >
> 
> Ah yes.. I see it.
> 
> > But yes we need different types. At least the context restore buffer
> > type is needed to properly implement GPU power management and context
> > switching.
> >
> 
> What role does GPU power management plays here? For the context switching
> it could make sense. But for the 2d core the context is so small that
> it does not
> hurt to send it with every command stream. For the 3d core it is much
> bigger, but
> this could be done completely in the kernel. Or I am wrong here?
> 
If you power down the GPU you loose the context. You are right that we
could save/restore the context from kernel space, but that is really
taking a toll on CPU time. It is much better to have userspace provide a
context buffer to get the GPU in the expected state, as you then only
need to splice this into the execution stream to restore the context
instead of pushing it with the CPU. Reading back the context on every
switch will kill any performance.

> >> > +       uint32_t submit_idx;     /* in, index of submit_bo cmdstream buffer */
> >> > +       uint32_t submit_offset;  /* in, offset into submit_bo */
> >>
> >> Do we really want/need the offset? I have removed it form cause it
> >> makes things in userspace
> >> more complex then needed.
> >>
> > It makes things a bit more complex, but it allows for far more efficient
> > buffer use if you have are dealing with a lot of flushes. I don't see
> > why we should prevent userspace from using this optimization.
> >
> 
> I tend to get things up and running and do the optimization step if it
> is really worth.
> Also I like stuff to be stupid simple. There is an other interesting
> fact: flushing the
> iommuv2 is done via command stream and we need to reserve more space for the
> tail of the used bo. So if we reserve some space in the command buffer, we have
> other space limits for the tail depending on used hardware.
> 
You may be aware that once this is upstream there is no easy way to
change the userspace interface anymore. So whatever is left out now is
likely to be very hard to reintroduce later.

What' the problem with having a command buffer in the kernel to flush
the MMUv2? Why do you need to insert those commands into the userspace
command stream?

> >> > +       uint32_t size;           /* in, cmdstream size */
> >> > +       uint32_t pad;
> >> > +       uint32_t nr_relocs;      /* in, number of submit_reloc's */
> >> > +       uint64_t __user relocs;  /* in, ptr to array of submit_reloc's */
> >> > +};
> >> > +
> >> > +/* Each buffer referenced elsewhere in the cmdstream submit (ie. the
> >> > + * cmdstream buffer(s) themselves or reloc entries) has one (and only
> >> > + * one) entry in the submit->bos[] table.
> >> > + *
> >> > + * As a optimization, the current buffer (gpu virtual address) can be
> >> > + * passed back through the 'presumed' field.  If on a subsequent reloc,
> >> > + * userspace passes back a 'presumed' address that is still valid,
> >> > + * then patching the cmdstream for this entry is skipped.  This can
> >> > + * avoid kernel needing to map/access the cmdstream bo in the common
> >> > + * case.
> >> > + */
> >> > +#define ETNA_SUBMIT_BO_READ             0x0001
> >> > +#define ETNA_SUBMIT_BO_WRITE            0x0002
> >> > +struct drm_etnaviv_gem_submit_bo {
> >> > +       uint32_t flags;          /* in, mask of ETNA_SUBMIT_BO_x */
> >> > +       uint32_t handle;         /* in, GEM handle */
> >> > +       uint64_t presumed;       /* in/out, presumed buffer address */
> >>
> >> presumed support should never hit etnaviv driver.
> >>
> > As stated in the cover letter I think presumed support will become
> > possible with MMUv2 and may provide a good optimization there. So I
> > would rather leave this in here and just ignore it for now.
> >
> 
> Your statement is funny as you have the following patch in your series:
> [PATCH RFC 070/111] staging: etnaviv: remove presumption of BO addresses
> 
You may notice the difference between interface and implementation.

> I have taken the idea of presumption from *drumroll* freedreno as the
> etnaviv driver
> started as 1:1 copy of freedreno. But what should I say, it never
> should be there and
> even freedreno does not make use of it (checked it about 2-3 months ago).
> Should the user space really know anything about physical addresses at
> all? Would be
> nice to hear the opinion of a drm guru here and maybe Russell.
> 

A presumed address can not be a physical address, but is an address in
the VM context of that process. Nouveau uses the same thing on NV50+
where you have a proper MMU to protect all GPU accesses. I would expect
the same thing to be true for Vivante MMUv2.

> >> > +};
> >> > +
> >> > +/* Each cmdstream submit consists of a table of buffers involved, and
> >> > + * one or more cmdstream buffers.  This allows for conditional execution
> >> > + * (context-restore), and IB buffers needed for per tile/bin draw cmds.
> >> > + */
> >> > +struct drm_etnaviv_gem_submit {
> >> > +       uint32_t pipe;           /* in, ETNA_PIPE_x */
> >> > +       uint32_t fence;          /* out */
> >> > +       uint32_t nr_bos;         /* in, number of submit_bo's */
> >> > +       uint32_t nr_cmds;        /* in, number of submit_cmd's */
> >>
> >> Do we really need to support mutliple cmds per submit? I have removed this
> >> from my kernel.
> >>
> > We need to support at least one additional context buffer, so I don't
> > see why we shouldn't support n buffers.
> >
> 
> Keep it stupid simple. In my libdrm repo, which you hopefully know, I
> have implemented
> the buffer handling from the original libetnaviv. We allocate 5
> command buffers of a defined
> size and rotate through them. During command buffer building we reserve space in
> the stream. if there is not enough space we flush the current buffer
> stream and switch to
> the next and us it. Then there is a way to explicit flush a command buffer.
> 
> For more details see:
> https://github.com/laanwj/etna_viv/tree/master/src/etnaviv
> https://github.com/austriancoder/libdrm
> 

Same argument as above really. We need at least the context buffer.

Regards,
Lucas

-- 
Pengutronix e.K.             | Lucas Stach                 |
Industrial Linux Solutions   | http://www.pengutronix.de/  |