[Mesa-dev] [PATCH v2 28/37] panfrost: Start tracking inter-batch dependencies

Mon Sep 16 20:15:14 UTC 2019

> + * A BO is either being written or read at any time, that's what the type field
> + * encodes.

Might this be inferred from (writer != NULL) and (readers->length > 0)?

> +        util_dynarray_foreach(&batch->dependencies,
> +                              struct panfrost_batch_fence *, dep)
> +                panfrost_batch_fence_unreference(*dep);
> +

Nit: Wrap in { braces } for 2+ lines for clarity

> +static void
> +panfrost_batch_add_dep(struct panfrost_batch *batch,
> +                       struct panfrost_batch_fence *newdep)
> +{
> +        if (batch == newdep->batch)
> +                return;
> +
> +        util_dynarray_foreach(&batch->dependencies,
> +                              struct panfrost_batch_fence *, dep) {
> +                if (*dep == newdep)
> +                        return;
> +        }
> +
> +        /* Make sure the dependency graph is acyclic. */
> +        assert(!panfrost_dep_graph_contains_batch(newdep->batch, batch));
> +
> +        panfrost_batch_fence_reference(newdep);
> +        util_dynarray_append(&batch->dependencies,
> +                             struct panfrost_batch_fence *, newdep);
> +
> +        /* We now have a batch depending on us, let's make sure new draw/clear
> +         * calls targeting the same FBO use a new batch object.
> +         */
> +        if (newdep->batch)
> +                panfrost_freeze_batch(newdep->batch);
> +}

I'm wondering if batch->dependencies should be expressed as a set,
rather than a dynarray, such that testing whether a batch has a
given dependency is ideally O(1), not O(N).

In practice I don't know if the asymptotic complexity matters, esp. for
small numbers of batches, and in practice hash table iteration is slow
enough in mesa* that maybe it would be counterproductive.

Just something I thought I might throw out there.

* Set iteration ought to be no slower than array iteration, but constant
  factors are a thing.

> +        int ret = drmSyncobjWait(pan_screen(fence->ctx->base.screen)->fd,
> +                                 &fence->syncobj, 1, 0, 0, NULL);
> +
> +        fence->signaled = ret >= 0;
> +        return fence->signaled;
> +}

Nit: Add a "/* Cache whether the fence was signaled */" comment?

> +static void
> +panfrost_bo_access_gc_fences(struct panfrost_context *ctx,
> +                             struct panfrost_bo_access *access,
> +			     const struct panfrost_bo *bo)

Could you remind me what gc abbreviates? Sorry.

I'm a little unclear on what the function's purpose is based on the
name.

> +{
> +        if (access->writer && panfrost_batch_fence_is_signaled(access->writer)) {
> +                panfrost_batch_fence_unreference(access->writer);
> +                access->writer = NULL;
> +	}

Spacing.

> +
> +        unsigned nreaders = 0;
> +        util_dynarray_foreach(&access->readers, struct panfrost_batch_fence *,
> +                              reader) {
> +                if (!*reader)
> +                        continue;

Please add parens (!(*reader)) for clarity.

> +static void
> +panfrost_gc_fences(struct panfrost_context *ctx)
> +{
> +        hash_table_foreach(ctx->accessed_bos, entry) {
> +                struct panfrost_bo_access *access = entry->data;
> +
> +                assert(access);
> +                panfrost_bo_access_gc_fences(ctx, access, entry->key);
> +                if (!util_dynarray_num_elements(&access->readers,
> +                                                struct panfrost_batch_fence *) &&
> +                    !access->writer)
> +                        _mesa_hash_table_remove(ctx->accessed_bos, entry);
> +        }
> +}

Question: is it safe to remove entries while iterating the table?
(Not a review comment, I don't know the details of mesa's
implementation)

Also not clear what panfrost_gc_fences is for.

> +static void
> +panfrost_batch_update_bo_access(struct panfrost_batch *batch,
> +                                struct panfrost_bo *bo, uint32_t access_type)
> +{
> +        struct panfrost_context *ctx = batch->ctx;
> +        struct panfrost_bo_access *access;
> +        uint32_t old_access_type;
> +        struct hash_entry *entry;
> +
> +        assert(access_type == PAN_BO_GPU_ACCESS_WRITE ||
> +               access_type == PAN_BO_GPU_ACCESS_READ);
> +
> +        entry = _mesa_hash_table_search(ctx->accessed_bos, bo);
> +        access = entry ? entry->data : NULL;
> +        if (access) {
> +                old_access_type = access->type;
> +        } else {
> +                access = rzalloc(ctx, struct panfrost_bo_access);
> +                util_dynarray_init(&access->readers, access);
> +                _mesa_hash_table_insert(ctx->accessed_bos, bo, access);
> +                /* We are the first to access this BO, let's initialize
> +                 * old_access_type to our own access type in that case.
> +                 */
> +                old_access_type = access_type;
> +                access->type = access_type;
> +        }
> +
> +        assert(access);
> +
> +        if (access_type == PAN_BO_GPU_ACCESS_WRITE &&
> +            old_access_type == PAN_BO_GPU_ACCESS_READ) {
> +                /* Previous access was a read and we want to write this BO.
> +                 * We first need to add explicit deps between our batch and
> +                 * the previous readers.
> +                 */
> +                util_dynarray_foreach(&access->readers,
> +                                      struct panfrost_batch_fence *, reader) {
> +                        /* We were already reading the BO, no need to add a dep
> +                         * on ourself (the acyclic check would complain about
> +                         * that).
> +                         */
> +                        if (!*reader || (*reader)->batch == batch)

!(*reader)

> +                                continue;
> +
> +                        panfrost_batch_add_dep(batch, *reader);
> +                }
> +                panfrost_batch_fence_reference(batch->out_sync);

Nit: Add a blank line between
> +        } else if (access_type == PAN_BO_GPU_ACCESS_READ &&
> +                   old_access_type == PAN_BO_GPU_ACCESS_WRITE) {
> +                /* Previous access was a write and we want to read this BO.
> +                 * First check if we were the previous writer, in that case
> +                 * we want to keep the access type unchanged, as a write is
> +                 * more constraining than a read.
> +                 */
> +                if (access->writer != batch->out_sync) {
> +                        /* Add a dependency on the previous writer. */
> +                        panfrost_batch_add_dep(batch, access->writer);
> +
> +                        /* The previous access was a write, there's no reason
> +                         * to have entries in the readers array.
> +                         */
> +                        assert(!util_dynarray_num_elements(&access->readers,
> +                                                           struct panfrost_batch_fence *));
> +
> +                        /* Add ourselves to the readers array. */
> +                        panfrost_batch_fence_reference(batch->out_sync);
> +                        util_dynarray_append(&access->readers,
> +                                             struct panfrost_batch_fence *,
> +                                             batch->out_sync);
> +                        access->type = access_type;

I would prefer an explicit `access->type = PAN_BO_GPU_ACCESS_READ`, so
the parallelism with `readers` is visually clear without needing to
glance back up at the condition.

> +                }
> +        } else {
> +                /* Previous access was a read and we want to read this BO.
> +                 * Add ourselves to the readers array if we're not already
> +                 * there and add a dependency on the previous writer (if any).
> +                 */
> +	        bool already_there = false;

Spacing.

> +                util_dynarray_foreach(&access->readers,
> +                                      struct panfrost_batch_fence *, reader) {
> +                        if (*reader && (*reader)->batch == batch) {
> +                                already_there = true;
> +                                break;
> +                        }
> +                }

This raises the point that access->readers should probably also be a
set, not an array, but again, not sure how much it matters.

> -        if (ctx->last_out_sync) {
> +        if (reqs & PANFROST_JD_REQ_FS && batch->first_job.gpu)

Could we add a `bool is_fragment` with this condition and then make this
as well as the if below simple if (is_fragment)?

> +        /* Submit the dependencies first. */
> +        util_dynarray_foreach(&batch->dependencies,
> +                              struct panfrost_batch_fence *, dep) {
> +                if ((*dep)->batch)
> +                        panfrost_batch_submit((*dep)->batch);
> +        }

I assume this logic will be refined later in the series...?