[Mesa-dev] [PATCH 8/8] panfrost: Add backend targeting the DRM driver

Rob Herring robh at kernel.org
Mon Mar 4 19:50:17 UTC 2019


On Mon, Mar 4, 2019 at 10:12 AM Tomeu Vizoso <tomeu.vizoso at collabora.com> wrote:
>
> This backend interacts with the new DRM driver for Midgard GPUs which is
> currently in development.
>
> When using this backend, Panfrost has roughly on-par functionality as
> when using the non-DRM driver from Arm.
>
> Signed-off-by: Tomeu Vizoso <tomeu.vizoso at collabora.com>
> ---
>  include/drm-uapi/panfrost_drm.h        | 128 +++++++++
>  src/gallium/drivers/panfrost/pan_drm.c | 362 +++++++++++++++++++++++++
>  2 files changed, 490 insertions(+)
>  create mode 100644 include/drm-uapi/panfrost_drm.h
>
> diff --git a/include/drm-uapi/panfrost_drm.h b/include/drm-uapi/panfrost_drm.h
> new file mode 100644
> index 000000000000..d4d271e37206
> --- /dev/null
> +++ b/include/drm-uapi/panfrost_drm.h
> @@ -0,0 +1,128 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/* Copyright 2018, Linaro, Ltd., Rob Herring <robh at kernel.org> */
> +
> +#ifndef __PANFROST_DRM_H__
> +#define __PANFROST_DRM_H__
> +
> +#include "drm.h"
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif
> +
> +/* timeouts are specified in clock-monotonic absolute times (to simplify
> + * restarting interrupted ioctls).  The following struct is logically the
> + * same as 'struct timespec' but 32/64b ABI safe.
> + */
> +struct drm_panfrost_timespec {
> +       __s64 tv_sec;          /* seconds */
> +       __s64 tv_nsec;         /* nanoseconds */
> +};
> +
> +#define PANFROST_PARAM_GPU_ID          0x01
> +
> +struct drm_panfrost_get_param {
> +       __u32 param;    /* in */
> +       __u32 pad;
> +       __u64 value;    /* out */
> +};
> +
> +struct drm_panfrost_gem_new {
> +       __u64 size;           /* in */
> +       __u32 flags;          /* in, mask of ETNA_BO_x */
> +       __u32 handle;         /* out */
> +       /**
> +        * Returned offset for the BO in the GPU address space.  This offset
> +        * is private to the DRM fd and is valid for the lifetime of the GEM
> +        * handle.
> +        *
> +        * This offset value will always be nonzero, since various HW
> +        * units treat 0 specially.
> +        */
> +       __u64 offset;
> +};
> +struct drm_panfrost_gem_info {
> +       __u32 handle;         /* in */
> +       __u32 pad;
> +       __u64 offset;         /* out, offset to pass to mmap() */
> +};
> +
> +/**
> + * Returns the offset for the BO in the GPU address space for this DRM fd.
> + * This is the same value returned by drm_panfrost_gem_new, if that was called
> + * from this DRM fd.
> + */
> +struct drm_panfrost_get_bo_offset {
> +       __u32 handle;
> +       __u32 pad;
> +       __u64 offset;
> +};
> +
> +
> +#define ETNA_PREP_READ        0x01
> +#define ETNA_PREP_WRITE       0x02
> +#define ETNA_PREP_NOSYNC      0x04

Guess we need to rename or delete these and a few other spots with
'ETNA'. I'd hoped to keep some alignment with other drivers, but I
don't think that's going to happen.

> +
> +struct drm_panfrost_gem_cpu_prep {
> +       __u32 handle;         /* in */
> +       __u32 op;             /* in, mask of ETNA_PREP_x */
> +       struct drm_panfrost_timespec timeout;   /* in */
> +};
> +
> +struct drm_panfrost_gem_cpu_fini {
> +       __u32 handle;         /* in */
> +       __u32 flags;          /* in, placeholder for now, no defined values */
> +};
> +
> +/*
> + * Cmdstream Submission:
> + */
> +
> +#define PANFROST_JD_REQ_FS (1 << 0)
> +
> +#define PANFROST_DEP_TYPE_ORDER        0x01
> +#define PANFROST_DEP_TYPE_DATA 0x02
> +
> +struct drm_panfrost_gem_submit_atom_dep {
> +       __u32 atom_nr;  /* job ID of dependency */
> +       __u32 type;     /* one of PANFROST_DEP_TYPE_* */
> +};
> +
> +struct drm_panfrost_gem_submit_atom {
> +       __u64 jc;           /* in, address to GPU mapping of job descriptor */
> +       __u32 atom_nr;      /* in, job ID */
> +       __u32 requirements; /* in, a combination of PANFROST_JD_REQ_* */
> +       __u64 bo_handles;
> +       __u32 bo_handle_count;
> +       struct drm_panfrost_gem_submit_atom_dep deps[2];
> +};
> +
> +struct drm_panfrost_gem_submit {
> +       __u32 nr_atoms;         /* in, number of submit_atom */
> +       __u32 pad;
> +       __u64 atoms;            /* in, ptr to array of submit_atom */
> +};
> +
> +
> +
> +#define DRM_PANFROST_GET_PARAM         0x00
> +#define DRM_PANFROST_GEM_NEW           0x01
> +#define DRM_PANFROST_GEM_INFO          0x02
> +#define DRM_PANFROST_GEM_CPU_PREP      0x03
> +#define DRM_PANFROST_GEM_CPU_FINI      0x04
> +#define DRM_PANFROST_GEM_SUBMIT                0x05
> +#define DRM_PANFROST_GET_BO_OFFSET     0x06
> +
> +#define DRM_IOCTL_PANFROST_GET_PARAM   DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_PARAM, struct drm_panfrost_get_param)
> +#define DRM_IOCTL_PANFROST_GEM_NEW     DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GEM_NEW, struct drm_panfrost_gem_new)
> +#define DRM_IOCTL_PANFROST_GEM_INFO    DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GEM_INFO, struct drm_panfrost_gem_info)
> +#define DRM_IOCTL_PANFROST_GEM_CPU_PREP        DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_GEM_CPU_PREP, struct drm_panfrost_gem_cpu_prep)
> +#define DRM_IOCTL_PANFROST_GEM_CPU_FINI        DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_GEM_CPU_FINI, struct drm_panfrost_gem_cpu_fini)
> +#define DRM_IOCTL_PANFROST_GEM_SUBMIT  DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_GEM_SUBMIT, struct drm_panfrost_gem_submit)
> +#define DRM_IOCTL_PANFROST_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_BO_OFFSET, struct drm_panfrost_get_bo_offset)
> +
> +#if defined(__cplusplus)
> +}
> +#endif
> +
> +#endif /* __PANFROST_DRM_H__ */
> diff --git a/src/gallium/drivers/panfrost/pan_drm.c b/src/gallium/drivers/panfrost/pan_drm.c
> index cb01cce634f7..1ae775c5f763 100644
> --- a/src/gallium/drivers/panfrost/pan_drm.c
> +++ b/src/gallium/drivers/panfrost/pan_drm.c
> @@ -21,16 +21,364 @@
>   * SOFTWARE.
>   *
>   */
> +
> +#include <fcntl.h>
> +#include <xf86drm.h>
> +
> +#include "drm-uapi/panfrost_drm.h"
> +
>  #include "util/u_memory.h"
> +#include "util/os_time.h"
>
>  #include "pan_screen.h"
> +#include "pan_resource.h"
> +#include "pan_context.h"
>  #include "pan_drm.h"
> +#include "pan_trace.h"
>
>  struct panfrost_drm {
>         struct panfrost_driver base;
>         int fd;
>  };
>
> +static void
> +panfrost_drm_allocate_slab(struct panfrost_screen *screen,
> +                          struct panfrost_memory *mem,
> +                          size_t pages,
> +                          bool same_va,
> +                          int extra_flags,
> +                          int commit_count,
> +                          int extent)
> +{
> +       struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
> +       struct drm_panfrost_gem_new gem_new = {
> +                       .size = pages * 4096,
> +                       .flags = 0,  // TODO figure out proper flags..
> +       };
> +       struct drm_panfrost_gem_info gem_info = {0,};
> +       int ret;
> +
> +       // TODO cache allocations
> +       // TODO properly handle errors
> +       // TODO take into account extra_flags
> +
> +       ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_GEM_NEW, &gem_new);
> +       if (ret) {
> +                fprintf(stderr, "DRM_IOCTL_PANFROST_GEM_NEW failed: %d\n", ret);
> +               assert(0);
> +       }
> +
> +       mem->gpu = gem_new.offset;
> +       mem->gem_handle = gem_new.handle;
> +        mem->stack_bottom = 0;
> +        mem->size = gem_new.size;
> +
> +       // TODO map and unmap on demand?
> +       gem_info.handle = gem_new.handle;
> +       ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_GEM_INFO, &gem_info);
> +       if (ret) {
> +                fprintf(stderr, "DRM_IOCTL_PANFROST_GEM_INFO failed: %d\n", ret);
> +               assert(0);
> +       }
> +
> +        mem->cpu = mmap(NULL, mem->size, PROT_READ | PROT_WRITE, MAP_SHARED,
> +                       drm->fd, gem_info.offset);
> +        if (mem->cpu == MAP_FAILED) {
> +                fprintf(stderr, "mmap failed: %p\n", mem->cpu);
> +               assert(0);
> +       }
> +
> +        /* Record the mmap if we're tracing */
> +        if (!(extra_flags & PAN_ALLOCATE_GROWABLE))
> +                pantrace_mmap(mem->gpu, mem->cpu, mem->size, NULL);
> +}
> +
> +static void
> +panfrost_drm_free_slab(struct panfrost_screen *screen, struct panfrost_memory *mem)
> +{
> +       struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
> +       struct drm_gem_close gem_close = {
> +               .handle = mem->gem_handle,
> +       };
> +       int ret;
> +
> +        if (munmap((void *) (uintptr_t) mem->cpu, mem->size)) {
> +                perror("munmap");
> +                abort();
> +        }
> +
> +       mem->cpu = NULL;
> +
> +       ret = drmIoctl(drm->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
> +       if (ret) {
> +                fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %d\n", ret);
> +               assert(0);
> +       }
> +
> +       mem->gem_handle = -1;
> +}
> +
> +static struct panfrost_bo *
> +panfrost_drm_import_bo(struct panfrost_screen *screen, struct winsys_handle *whandle)
> +{
> +       struct panfrost_bo *bo = CALLOC_STRUCT(panfrost_bo);
> +       struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
> +        struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
> +        int ret;
> +        unsigned gem_handle;
> +
> +       ret = drmPrimeFDToHandle(drm->fd, whandle->handle, &gem_handle);
> +       assert(!ret);
> +
> +       get_bo_offset.handle = gem_handle;
> +        ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
> +        assert(!ret);
> +
> +       bo->gem_handle = gem_handle;
> +        bo->gpu[0] = (mali_ptr) get_bo_offset.offset;
> +
> +       /* Why would we need to map the dumb buffer now? */
> +        bo->cpu[0] = NULL;
> +
> +        return bo;
> +}
> +
> +static int
> +panfrost_drm_export_bo(struct panfrost_screen *screen, int gem_handle, struct winsys_handle *whandle)
> +{
> +       struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
> +        struct drm_prime_handle args = {
> +                .handle = gem_handle,
> +                .flags = DRM_CLOEXEC,
> +        };
> +
> +        int ret = drmIoctl(drm->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
> +        if (ret == -1)
> +                return FALSE;
> +
> +        whandle->handle = args.fd;
> +
> +        return TRUE;
> +}
> +
> +static void
> +panfrost_drm_free_imported_bo(struct panfrost_screen *screen, struct panfrost_bo *bo)
> +{
> +       struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
> +       struct drm_gem_close gem_close = {
> +               .handle = bo->gem_handle,
> +       };
> +       int ret;
> +
> +       ret = drmIoctl(drm->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
> +       if (ret) {
> +                fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %d\n", ret);
> +               assert(0);
> +       }
> +
> +       bo->gem_handle = -1;
> +       bo->gpu[0] = (mali_ptr)NULL;
> +}
> +
> +static uint8_t
> +allocate_atom()
> +{
> +       /* Use to allocate atom numbers for jobs. We probably want to overhaul this in kernel space at some point. */
> +       static uint8_t atom_counter = 0;
> +
> +        atom_counter++;
> +
> +        /* Workaround quirk where atoms must be strictly positive */
> +
> +        if (atom_counter == 0)
> +                atom_counter++;
> +
> +        return atom_counter;
> +}
> +
> +static int
> +panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws, bool is_scanout)
> +{
> +        struct pipe_context *gallium = (struct pipe_context *) ctx;
> +        struct panfrost_screen *screen = pan_screen(gallium->screen);
> +       struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
> +        struct drm_panfrost_gem_submit_atom atoms[2];
> +        struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0];
> +        int idx = 0;
> +
> +       memset(atoms, 0, sizeof(atoms));
> +
> +        atoms[idx].deps[0].atom_nr = screen->last_fragment_id;
> +        atoms[idx].deps[0].type = PANFROST_DEP_TYPE_ORDER;
> +
> +        if (has_draws) {
> +               atoms[idx].jc = ctx->set_value_job;
> +               atoms[idx].atom_nr = allocate_atom();
> +               idx++;
> +               atoms[idx].deps[0].atom_nr = atoms[idx - 1].atom_nr;
> +               atoms[idx].deps[0].type = PANFROST_DEP_TYPE_DATA;
> +       }
> +
> +       atoms[idx].jc = panfrost_fragment_job(ctx);
> +       atoms[idx].atom_nr = allocate_atom();
> +       atoms[idx].requirements = PANFROST_JD_REQ_FS;
> +       if (surf) {
> +               struct panfrost_resource *res = pan_resource(surf->texture);
> +               atoms[idx].bo_handles = (__u64) &res->bo->gem_handle;
> +               atoms[idx].bo_handle_count = 1;
> +       }
> +
> +        struct drm_panfrost_gem_submit submit = {
> +                .atoms = (__u64)atoms,
> +                .nr_atoms = idx + 1,
> +        };
> +
> +        /* Dump memory _before_ submitting so we're not corrupted with actual GPU results */
> +        pantrace_dump_memory();
> +
> +        if (drmIoctl(drm->fd, DRM_IOCTL_PANFROST_GEM_SUBMIT, &submit)) {
> +                printf("Error submitting\n");
> +                assert(0);
> +        }
> +
> +        /* Trace the job if we're doing that and do a memory dump. We may
> +         * want to adjust this logic once we're ready to trace FBOs */
> +        for (unsigned i = (has_draws ? 0 : 1); i < 2; ++i) {
> +                pantrace_submit_job(atoms[i].jc, atoms[i].requirements, FALSE);
> +        }
> +
> +        /* Return fragment ID */
> +        return atoms[idx].atom_nr;
> +}
> +
> +static struct panfrost_fence *
> +panfrost_fence_create(struct panfrost_context *ctx)
> +{
> +        struct pipe_context *gallium = (struct pipe_context *) ctx;
> +        struct panfrost_screen *screen = pan_screen(gallium->screen);
> +       struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
> +        struct panfrost_fence *f = calloc(1, sizeof(*f));
> +        if (!f)
> +                return NULL;
> +
> +        /* Snapshot the last Panfrost's rendering's out fence.  We'd rather have
> +         * another syncobj instead of a sync file, but this is all we get.
> +         * (HandleToFD/FDToHandle just gives you another syncobj ID for the
> +         * same syncobj).
> +         */
> +        drmSyncobjExportSyncFile(drm->fd, ctx->out_sync, &f->fd);
> +        if (f->fd == -1) {
> +                fprintf(stderr, "export failed\n");
> +                free(f);
> +                return NULL;
> +        }
> +
> +        pipe_reference_init(&f->reference, 1);
> +
> +        return f;
> +}
> +
> +static void
> +panfrost_drm_force_flush_fragment(struct panfrost_context *ctx,
> +                                 struct pipe_fence_handle **fence)
> +{
> +        struct pipe_context *gallium = (struct pipe_context *) ctx;
> +
> +        if (fence) {
> +                struct panfrost_fence *f = panfrost_fence_create(ctx);
> +                gallium->screen->fence_reference(gallium->screen, fence, NULL);
> +                *fence = (struct pipe_fence_handle *)f;
> +        }
> +}
> +
> +static void
> +panfrost_drm_enable_counters(struct panfrost_screen *screen)
> +{
> +       fprintf(stderr, "unimplemented: %s\n", __func__);
> +}
> +
> +static void
> +panfrost_drm_dump_counters(struct panfrost_screen *screen)
> +{
> +       fprintf(stderr, "unimplemented: %s\n", __func__);
> +}
> +
> +static unsigned
> +panfrost_drm_query_gpu_version(struct panfrost_screen *screen)
> +{
> +       struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
> +        struct drm_panfrost_get_param get_param = {0,};
> +        int ret;
> +
> +       get_param.param = PANFROST_PARAM_GPU_ID;
> +        ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_GET_PARAM, &get_param);
> +        assert(!ret);
> +
> +       return get_param.value;
> +}
> +
> +static int
> +panfrost_drm_init_context(struct panfrost_context *ctx)
> +{
> +        struct pipe_context *gallium = (struct pipe_context *) ctx;
> +        struct panfrost_screen *screen = pan_screen(gallium->screen);
> +       struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
> +
> +        return drmSyncobjCreate(drm->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
> +                                &ctx->out_sync);
> +}
> +
> +static void
> +panfrost_drm_fence_reference(struct pipe_screen *screen,
> +                         struct pipe_fence_handle **ptr,
> +                         struct pipe_fence_handle *fence)
> +{
> +        struct panfrost_fence **p = (struct panfrost_fence **)ptr;
> +        struct panfrost_fence *f = (struct panfrost_fence *)fence;
> +        struct panfrost_fence *old = *p;
> +
> +        if (pipe_reference(&(*p)->reference, &f->reference)) {
> +                close(old->fd);
> +                free(old);
> +        }
> +        *p = f;
> +}
> +
> +static boolean
> +panfrost_drm_fence_finish(struct pipe_screen *pscreen,
> +                      struct pipe_context *ctx,
> +                      struct pipe_fence_handle *fence,
> +                      uint64_t timeout)
> +{
> +        struct panfrost_screen *screen = pan_screen(pscreen);
> +       struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
> +        struct panfrost_fence *f = (struct panfrost_fence *)fence;
> +        int ret;
> +
> +        unsigned syncobj;
> +        ret = drmSyncobjCreate(drm->fd, 0, &syncobj);
> +        if (ret) {
> +                fprintf(stderr, "Failed to create syncobj to wait on: %m\n");
> +                return false;
> +        }
> +
> +        drmSyncobjImportSyncFile(drm->fd, syncobj, f->fd);
> +        if (ret) {
> +                fprintf(stderr, "Failed to import fence to syncobj: %m\n");
> +                return false;
> +        }
> +
> +        uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
> +        if (abs_timeout == OS_TIMEOUT_INFINITE)
> +                abs_timeout = INT64_MAX;
> +
> +        ret = drmSyncobjWait(drm->fd, &syncobj, 1, abs_timeout, 0, NULL);
> +
> +        drmSyncobjDestroy(drm->fd, syncobj);
> +
> +        return ret >= 0;
> +}
> +
>  struct panfrost_driver *
>  panfrost_create_drm_driver(int fd)
>  {
> @@ -38,5 +386,19 @@ panfrost_create_drm_driver(int fd)
>
>         driver->fd = fd;
>
> +       driver->base.import_bo = panfrost_drm_import_bo;
> +       driver->base.export_bo = panfrost_drm_export_bo;
> +       driver->base.free_imported_bo = panfrost_drm_free_imported_bo;
> +       driver->base.submit_vs_fs_job = panfrost_drm_submit_vs_fs_job;
> +       driver->base.force_flush_fragment = panfrost_drm_force_flush_fragment;
> +       driver->base.allocate_slab = panfrost_drm_allocate_slab;
> +       driver->base.free_slab = panfrost_drm_free_slab;
> +       driver->base.enable_counters = panfrost_drm_enable_counters;
> +       driver->base.query_gpu_version = panfrost_drm_query_gpu_version;
> +       driver->base.init_context = panfrost_drm_init_context;
> +       driver->base.fence_reference = panfrost_drm_fence_reference;
> +       driver->base.fence_finish = panfrost_drm_fence_finish;
> +       driver->base.dump_counters = panfrost_drm_dump_counters;
> +
>          return &driver->base;
>  }
> --
> 2.20.1
>


More information about the mesa-dev mailing list