[Mesa-dev] [PATCH 10/10] gallium/u_threaded: don't run out of memory with staging uploads
Marek Olšák
maraeo at gmail.com
Mon Jan 22 22:42:06 UTC 2018
On Mon, Jan 22, 2018 at 2:07 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> On 10.01.2018 20:49, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> Cc: 17.2 17.3 <mesa-stable at lists.freedesktop.org>
>> ---
>> src/gallium/auxiliary/util/u_threaded_context.c | 13 +++++++++++++
>> src/gallium/auxiliary/util/u_threaded_context.h | 8 ++++++++
>> 2 files changed, 21 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/util/u_threaded_context.c
>> b/src/gallium/auxiliary/util/u_threaded_context.c
>> index ffa8247..7bd13cf 100644
>> --- a/src/gallium/auxiliary/util/u_threaded_context.c
>> +++ b/src/gallium/auxiliary/util/u_threaded_context.c
>> @@ -1508,35 +1508,47 @@ struct tc_resource_copy_region {
>> };
>> static void
>> tc_resource_copy_region(struct pipe_context *_pipe,
>> struct pipe_resource *dst, unsigned dst_level,
>> unsigned dstx, unsigned dsty, unsigned dstz,
>> struct pipe_resource *src, unsigned src_level,
>> const struct pipe_box *src_box);
>> static void
>> +tc_notify_staging_upload_done(struct threaded_context *tc, unsigned size)
>> +{
>> + tc->unflushed_transfer_size += size;
>> +
>> + if (tc->unflushed_transfer_size >
>> TC_MAX_UNFLUSHED_STAGING_UPLOAD_SIZE) {
>> + tc->base.flush(&tc->base, NULL, PIPE_FLUSH_ASYNC);
>> + tc->unflushed_transfer_size = 0;
>> + }
>> +}
>> +
>> +static void
>> tc_buffer_do_flush_region(struct threaded_context *tc,
>> struct threaded_transfer *ttrans,
>> const struct pipe_box *box)
>> {
>> struct threaded_resource *tres =
>> threaded_resource(ttrans->b.resource);
>> if (ttrans->staging) {
>> struct pipe_box src_box;
>> u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment,
>> box->width, &src_box);
>> /* Copy the staging buffer into the original one. */
>> tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x,
>> 0, 0,
>> ttrans->staging, 0, &src_box);
>> + tc_notify_staging_upload_done(tc, box->width);
>> }
>> util_range_add(tres->base_valid_buffer_range, box->x, box->x +
>> box->width);
>> }
>> static void
>> tc_transfer_flush_region(struct pipe_context *_pipe,
>> struct pipe_transfer *transfer,
>> const struct pipe_box *rel_box)
>> {
>> @@ -1653,20 +1665,21 @@ tc_buffer_subdata(struct pipe_context *_pipe,
>> /* The upload is small. Enqueue it. */
>> struct tc_buffer_subdata *p =
>> tc_add_slot_based_call(tc, TC_CALL_buffer_subdata,
>> tc_buffer_subdata, size);
>> tc_set_resource_reference(&p->resource, resource);
>> p->usage = usage;
>> p->offset = offset;
>> p->size = size;
>> memcpy(p->slot, data, size);
>> + tc_notify_staging_upload_done(tc, size);
>> }
>> struct tc_texture_subdata {
>> struct pipe_resource *resource;
>> unsigned level, usage, stride, layer_stride;
>> struct pipe_box box;
>> char slot[0]; /* more will be allocated if needed */
>> };
>> static void
>> diff --git a/src/gallium/auxiliary/util/u_threaded_context.h
>> b/src/gallium/auxiliary/util/u_threaded_context.h
>> index 53c5a7e..295464a 100644
>> --- a/src/gallium/auxiliary/util/u_threaded_context.h
>> +++ b/src/gallium/auxiliary/util/u_threaded_context.h
>> @@ -225,20 +225,27 @@ struct tc_unflushed_batch_token;
>> /* Threshold for when to use the queue or sync. */
>> #define TC_MAX_STRING_MARKER_BYTES 512
>> /* Threshold for when to enqueue buffer/texture_subdata as-is.
>> * If the upload size is greater than this, it will do instead:
>> * - for buffers: DISCARD_RANGE is done by the threaded context
>> * - for textures: sync and call the driver directly
>> */
>> #define TC_MAX_SUBDATA_BYTES 320
>> +/* Every staging upload allocates memory. If we have too many uploads
>> + * in a row without flushes, we might run out of memory. This limit
>> controls
>> + * how many bytes of queued uploads we can have at a time. If we go over,
>> + * the threaded context triggers a context flush.
>> + */
>> +#define TC_MAX_UNFLUSHED_STAGING_UPLOAD_SIZE (512 * 1024 * 1024)
>
>
> This seems very aggressive. In reality, this should probably scale with free
> space in GART, but we don't know that here. Can you reduce it to something
> like 64MB? Unless there's concrete evidence that having it higher is
> beneficial, of course.
Well the current limit is infinite. I think 64MB is too low. I would
expect games to use 2-4GB of VRAM per IB. 512MB seems reasonable for a
GTT upload limit. 512MB might not be great for small APUs, but it's
definitely not very high for mid-sized dGPUs, and context flushes can
be bad for perf.
I know that Metro 2033 is slower if I decrease the limit a lot, but I
don't remember what size I was testing.
Marek
More information about the mesa-dev
mailing list