[Mesa-dev] [PATCH v3 13/17] panfrost: Make sure the BO is 'ready' when picked from the cache
Alyssa Rosenzweig
alyssa at rosenzweig.io
Fri Sep 20 21:06:05 UTC 2019
Very nice! I'm quite happy with this version, all considered, so R-b!
On Wed, Sep 18, 2019 at 03:24:35PM +0200, Boris Brezillon wrote:
> This is needed if we want to free the panfrost_batch object at submit
> time in order to not have to GC the batch on the next job submission.
>
> Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
> ---
> Changes in v3:
> * Move the patch later in the series and squash "panfrost: Cache GPU
> accesses to BOs" in it
> * Add extra comments to explain what we're doing
> ---
> src/gallium/drivers/panfrost/pan_bo.c | 112 ++++++++++++++++++++-----
> src/gallium/drivers/panfrost/pan_bo.h | 9 ++
> src/gallium/drivers/panfrost/pan_job.c | 11 +++
> 3 files changed, 109 insertions(+), 23 deletions(-)
>
> diff --git a/src/gallium/drivers/panfrost/pan_bo.c b/src/gallium/drivers/panfrost/pan_bo.c
> index 9daddf9d0cc2..37602688d630 100644
> --- a/src/gallium/drivers/panfrost/pan_bo.c
> +++ b/src/gallium/drivers/panfrost/pan_bo.c
> @@ -23,6 +23,7 @@
> * Authors (Collabora):
> * Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
> */
> +#include <errno.h>
> #include <stdio.h>
> #include <fcntl.h>
> #include <xf86drm.h>
> @@ -101,6 +102,63 @@ panfrost_bo_free(struct panfrost_bo *bo)
> ralloc_free(bo);
> }
>
> +/* Returns true if the BO is ready, false otherwise.
> + * access_type is encoding the type of access one wants to ensure is done.
> + * Say you want to make sure all writers are done writing, you should pass
> + * PAN_BO_ACCESS_WRITE.
> + * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW.
> + * PAN_BO_ACCESS_READ would work too as waiting for readers implies
> + * waiting for writers as well, but we want to make things explicit and waiting
> + * only for readers is impossible.
> + */
> +bool
> +panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
> + uint32_t access_type)
> +{
> + struct drm_panfrost_wait_bo req = {
> + .handle = bo->gem_handle,
> + .timeout_ns = timeout_ns,
> + };
> + int ret;
> +
> + assert(access_type == PAN_BO_ACCESS_WRITE ||
> + access_type == PAN_BO_ACCESS_RW);
> +
> + /* If the BO has been exported or imported we can't rely on the cached
> + * state, we need to call the WAIT_BO ioctl.
> + */
> + if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) {
> + /* If ->gpu_access is 0, the BO is idle, no need to wait. */
> + if (!bo->gpu_access)
> + return true;
> +
> + /* If the caller only wants to wait for writers and no
> + * writes are pending, we don't have to wait.
> + */
> + if (access_type == PAN_BO_ACCESS_WRITE &&
> + !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
> + return true;
> + }
> +
> + /* The ioctl returns >= 0 value when the BO we are waiting for is ready
> + * -1 otherwise.
> + */
> + ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
> + if (ret != -1) {
> + /* Set gpu_access to 0 so that the next call to bo_wait()
> + * doesn't have to call the WAIT_BO ioctl.
> + */
> + bo->gpu_access = 0;
> + return true;
> + }
> +
> + /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
> + * is invalid, which shouldn't happen here.
> + */
> + assert(errno == ETIMEDOUT || errno == EBUSY);
> + return false;
> +}
> +
> /* Helper to calculate the bucket index of a BO */
>
> static unsigned
> @@ -137,9 +195,8 @@ pan_bucket(struct panfrost_screen *screen, unsigned size)
> * BO. */
>
> static struct panfrost_bo *
> -panfrost_bo_cache_fetch(
> - struct panfrost_screen *screen,
> - size_t size, uint32_t flags)
> +panfrost_bo_cache_fetch(struct panfrost_screen *screen,
> + size_t size, uint32_t flags, bool dontwait)
> {
> pthread_mutex_lock(&screen->bo_cache_lock);
> struct list_head *bucket = pan_bucket(screen, size);
> @@ -147,27 +204,30 @@ panfrost_bo_cache_fetch(
>
> /* Iterate the bucket looking for something suitable */
> list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
> - if (entry->size >= size &&
> - entry->flags == flags) {
> - int ret;
> - struct drm_panfrost_madvise madv;
> + if (entry->size < size || entry->flags != flags)
> + continue;
>
> - /* This one works, splice it out of the cache */
> - list_del(&entry->link);
> + if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
> + PAN_BO_ACCESS_RW))
> + continue;
>
> - madv.handle = entry->gem_handle;
> - madv.madv = PANFROST_MADV_WILLNEED;
> - madv.retained = 0;
> + struct drm_panfrost_madvise madv = {
> + .handle = entry->gem_handle,
> + .madv = PANFROST_MADV_WILLNEED,
> + };
> + int ret;
>
> - ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
> - if (!ret && !madv.retained) {
> - panfrost_bo_free(entry);
> - continue;
> - }
> - /* Let's go! */
> - bo = entry;
> - break;
> + /* This one works, splice it out of the cache */
> + list_del(&entry->link);
> +
> + ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
> + if (!ret && !madv.retained) {
> + panfrost_bo_free(entry);
> + continue;
> }
> + /* Let's go! */
> + bo = entry;
> + break;
> }
> pthread_mutex_unlock(&screen->bo_cache_lock);
>
> @@ -281,12 +341,18 @@ panfrost_bo_create(struct panfrost_screen *screen, size_t size,
> if (flags & PAN_BO_GROWABLE)
> assert(flags & PAN_BO_INVISIBLE);
>
> - /* Before creating a BO, we first want to check the cache, otherwise,
> - * the cache misses and we need to allocate a BO fresh from the kernel
> + /* Before creating a BO, we first want to check the cache but without
> + * waiting for BO readiness (BOs in the cache can still be referenced
> + * by jobs that are not finished yet).
> + * If the cached allocation fails we fall back on fresh BO allocation,
> + * and if that fails too, we try one more time to allocate from the
> + * cache, but this time we accept to wait.
> */
> - bo = panfrost_bo_cache_fetch(screen, size, flags);
> + bo = panfrost_bo_cache_fetch(screen, size, flags, true);
> if (!bo)
> bo = panfrost_bo_alloc(screen, size, flags);
> + if (!bo)
> + bo = panfrost_bo_cache_fetch(screen, size, flags, false);
>
> if (!bo)
> fprintf(stderr, "BO creation failed\n");
> diff --git a/src/gallium/drivers/panfrost/pan_bo.h b/src/gallium/drivers/panfrost/pan_bo.h
> index e4743f820aeb..022a56d9ca34 100644
> --- a/src/gallium/drivers/panfrost/pan_bo.h
> +++ b/src/gallium/drivers/panfrost/pan_bo.h
> @@ -100,6 +100,12 @@ struct panfrost_bo {
> int gem_handle;
>
> uint32_t flags;
> +
> + /* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending
> + * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
> + * when the BO is idle.
> + */
> + uint32_t gpu_access;
> };
>
> static inline uint32_t
> @@ -113,6 +119,9 @@ panfrost_bo_access_for_stage(enum pipe_shader_type stage)
> PAN_BO_ACCESS_VERTEX_TILER;
> }
>
> +bool
> +panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
> + uint32_t access_type);
> void
> panfrost_bo_reference(struct panfrost_bo *bo);
> void
> diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
> index 235cb21dc8c8..a56f4044fda0 100644
> --- a/src/gallium/drivers/panfrost/pan_job.c
> +++ b/src/gallium/drivers/panfrost/pan_job.c
> @@ -810,8 +810,19 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
>
> hash_table_foreach(batch->bos, entry) {
> struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
> + uint32_t flags = (uintptr_t)entry->data;
> +
> assert(bo->gem_handle > 0);
> bo_handles[submit.bo_handle_count++] = bo->gem_handle;
> +
> + /* Update the BO access flags so that panfrost_bo_wait() knows
> + * about all pending accesses.
> + * We only keep the READ/WRITE info since this is all the BO
> + * wait logic cares about.
> + * We also preserve existing flags as this batch might not
> + * be the first one to access the BO.
> + */
> + bo->gpu_access |= flags & (PAN_BO_ACCESS_RW);
> }
>
> submit.bo_handles = (u64) (uintptr_t) bo_handles;
> --
> 2.21.0
More information about the mesa-dev
mailing list