[Mesa-dev] [PATCH 10/10] gallium/u_threaded: don't run out of memory with staging uploads

Nicolai Hähnle nhaehnle at gmail.com
Mon Jan 22 13:07:54 UTC 2018


On 10.01.2018 20:49, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> Cc: 17.2 17.3 <mesa-stable at lists.freedesktop.org>
> ---
>   src/gallium/auxiliary/util/u_threaded_context.c | 13 +++++++++++++
>   src/gallium/auxiliary/util/u_threaded_context.h |  8 ++++++++
>   2 files changed, 21 insertions(+)
> 
> diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
> index ffa8247..7bd13cf 100644
> --- a/src/gallium/auxiliary/util/u_threaded_context.c
> +++ b/src/gallium/auxiliary/util/u_threaded_context.c
> @@ -1508,35 +1508,47 @@ struct tc_resource_copy_region {
>   };
>   
>   static void
>   tc_resource_copy_region(struct pipe_context *_pipe,
>                           struct pipe_resource *dst, unsigned dst_level,
>                           unsigned dstx, unsigned dsty, unsigned dstz,
>                           struct pipe_resource *src, unsigned src_level,
>                           const struct pipe_box *src_box);
>   
>   static void
> +tc_notify_staging_upload_done(struct threaded_context *tc, unsigned size)
> +{
> +   tc->unflushed_transfer_size += size;
> +
> +   if (tc->unflushed_transfer_size > TC_MAX_UNFLUSHED_STAGING_UPLOAD_SIZE) {
> +      tc->base.flush(&tc->base, NULL, PIPE_FLUSH_ASYNC);
> +      tc->unflushed_transfer_size = 0;
> +   }
> +}
> +
> +static void
>   tc_buffer_do_flush_region(struct threaded_context *tc,
>                             struct threaded_transfer *ttrans,
>                             const struct pipe_box *box)
>   {
>      struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
>   
>      if (ttrans->staging) {
>         struct pipe_box src_box;
>   
>         u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment,
>                  box->width, &src_box);
>   
>         /* Copy the staging buffer into the original one. */
>         tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
>                                 ttrans->staging, 0, &src_box);
> +      tc_notify_staging_upload_done(tc, box->width);
>      }
>   
>      util_range_add(tres->base_valid_buffer_range, box->x, box->x + box->width);
>   }
>   
>   static void
>   tc_transfer_flush_region(struct pipe_context *_pipe,
>                            struct pipe_transfer *transfer,
>                            const struct pipe_box *rel_box)
>   {
> @@ -1653,20 +1665,21 @@ tc_buffer_subdata(struct pipe_context *_pipe,
>   
>      /* The upload is small. Enqueue it. */
>      struct tc_buffer_subdata *p =
>         tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
>   
>      tc_set_resource_reference(&p->resource, resource);
>      p->usage = usage;
>      p->offset = offset;
>      p->size = size;
>      memcpy(p->slot, data, size);
> +   tc_notify_staging_upload_done(tc, size);
>   }
>   
>   struct tc_texture_subdata {
>      struct pipe_resource *resource;
>      unsigned level, usage, stride, layer_stride;
>      struct pipe_box box;
>      char slot[0]; /* more will be allocated if needed */
>   };
>   
>   static void
> diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h
> index 53c5a7e..295464a 100644
> --- a/src/gallium/auxiliary/util/u_threaded_context.h
> +++ b/src/gallium/auxiliary/util/u_threaded_context.h
> @@ -225,20 +225,27 @@ struct tc_unflushed_batch_token;
>   /* Threshold for when to use the queue or sync. */
>   #define TC_MAX_STRING_MARKER_BYTES  512
>   
>   /* Threshold for when to enqueue buffer/texture_subdata as-is.
>    * If the upload size is greater than this, it will do instead:
>    * - for buffers: DISCARD_RANGE is done by the threaded context
>    * - for textures: sync and call the driver directly
>    */
>   #define TC_MAX_SUBDATA_BYTES        320
>   
> +/* Every staging upload allocates memory. If we have too many uploads
> + * in a row without flushes, we might run out of memory. This limit controls
> + * how many bytes of queued uploads we can have at a time. If we go over,
> + * the threaded context triggers a context flush.
> + */
> +#define TC_MAX_UNFLUSHED_STAGING_UPLOAD_SIZE (512 * 1024 * 1024)

This seems very aggressive. In reality, this should probably scale with 
free space in GART, but we don't know that here. Can you reduce it to 
something like 64MB? Unless there's concrete evidence that having it 
higher is beneficial, of course.

With that, patches 9 & 10:

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>


> +
>   typedef void (*tc_replace_buffer_storage_func)(struct pipe_context *ctx,
>                                                  struct pipe_resource *dst,
>                                                  struct pipe_resource *src);
>   typedef struct pipe_fence_handle *(*tc_create_fence_func)(struct pipe_context *ctx,
>                                                             struct tc_unflushed_batch_token *token);
>   
>   struct threaded_resource {
>      struct pipe_resource b;
>      const struct u_resource_vtbl *vtbl;
>   
> @@ -346,20 +353,21 @@ struct tc_batch {
>      struct tc_call call[TC_CALLS_PER_BATCH];
>   };
>   
>   struct threaded_context {
>      struct pipe_context base;
>      struct pipe_context *pipe;
>      struct slab_child_pool pool_transfers;
>      tc_replace_buffer_storage_func replace_buffer_storage;
>      tc_create_fence_func create_fence;
>      unsigned map_buffer_alignment;
> +   unsigned unflushed_transfer_size;
>   
>      struct list_head unflushed_queries;
>   
>      /* Counters for the HUD. */
>      unsigned num_offloaded_slots;
>      unsigned num_direct_slots;
>      unsigned num_syncs;
>   
>      struct util_queue queue;
>      struct util_queue_fence *fence;
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list