[Mesa-dev] [PATCH 5/5] r600g: always map uninitialized buffer range as unsynchronized

Alex Deucher alexdeucher at gmail.com
Thu Feb 28 07:26:52 PST 2013


On Thu, Feb 28, 2013 at 4:05 AM, Marek Olšák <maraeo at gmail.com> wrote:
> TF2 doesn't use CP DMA with this patch at all, so I'm not seeing any
> issue obviously. This patch is an optimization, not a workaround. It
> also doesn't affect CP DMA in any way. It only detects when a buffer
> range can be mapped without the CPU-GPU synchronization, which is the
> most efficient way of mapping a buffer (no GEM_WAIT_IDLE, no temporary
> staging resource, no copying in buffer_unmap).
>
> CP DMA is still probably not reliable with 6xx/7xx, but we won't be
> able to use TF2 anymore to test it.

Well, it's possible there are hardware issues with it, but CP DMA has
been around since r100 and from what I can tell internally, it's
pretty reliable.  I think the issue with TF2 were more circumstantial,
due to the combination of state and buffers (even streamout failed to
work correctly in those circumstances).  Maybe a env var to
selectively enable it?  I'm not pressed either way, just seems a shame
not to use it.

Alex

>
> Marek
>
> On Thu, Feb 28, 2013 at 12:46 AM, Alex Deucher <alexdeucher at gmail.com> wrote:
>> On Wed, Feb 27, 2013 at 6:11 PM, Marek Olšák <maraeo at gmail.com> wrote:
>>> Any driver can implement this simple and efficient optimization.
>>> Team Fortress 2 hits it always. The DISCARD_RANGE codepath is not even used
>>> with TF2 anymore, so we avoid a ton of useless buffer copies.
>>
>> With this patch applied are you still seeing issues any issues with CP
>> DMA?  If not maybe we can leave it enabled for 6xx/7xx.
>>
>> Alex
>>
>>> ---
>>>  src/gallium/drivers/r600/evergreen_hw_context.c |    3 +++
>>>  src/gallium/drivers/r600/evergreen_state.c      |    4 ++++
>>>  src/gallium/drivers/r600/r600.h                 |   11 +++++++++++
>>>  src/gallium/drivers/r600/r600_buffer.c          |   17 +++++++++++++++++
>>>  src/gallium/drivers/r600/r600_hw_context.c      |    6 ++++++
>>>  src/gallium/drivers/r600/r600_state_common.c    |    4 ++++
>>>  6 files changed, 45 insertions(+)
>>>
>>> diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
>>> index b0f64b4..f81d7f3 100644
>>> --- a/src/gallium/drivers/r600/evergreen_hw_context.c
>>> +++ b/src/gallium/drivers/r600/evergreen_hw_context.c
>>> @@ -241,4 +241,7 @@ void evergreen_dma_copy(struct r600_context *rctx,
>>>                 src_offset += csize << shift;
>>>                 size -= csize;
>>>         }
>>> +
>>> +       util_range_add(&rdst->valid_buffer_range, dst_offset,
>>> +                      dst_offset + size);
>>>  }
>>> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
>>> index 38f83e9..4a41c28 100644
>>> --- a/src/gallium/drivers/r600/evergreen_state.c
>>> +++ b/src/gallium/drivers/r600/evergreen_state.c
>>> @@ -1376,6 +1376,10 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
>>>          * elements. */
>>>         surf->cb_color_dim = pipe_buffer->width0;
>>>
>>> +       /* Set the buffer range the GPU will have access to: */
>>> +       util_range_add(&r600_resource(pipe_buffer)->valid_buffer_range,
>>> +                      0, pipe_buffer->width0);
>>> +
>>>         surf->cb_color_cmask = surf->cb_color_base;
>>>         surf->cb_color_cmask_slice = 0;
>>>         surf->cb_color_fmask = surf->cb_color_base;
>>> diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
>>> index d018ebb..15196f7 100644
>>> --- a/src/gallium/drivers/r600/r600.h
>>> +++ b/src/gallium/drivers/r600/r600.h
>>> @@ -28,6 +28,7 @@
>>>
>>>  #include "../../winsys/radeon/drm/radeon_winsys.h"
>>>  #include "util/u_double_list.h"
>>> +#include "util/u_range.h"
>>>  #include "util/u_transfer.h"
>>>
>>>  #define R600_ERR(fmt, args...) \
>>> @@ -50,6 +51,16 @@ struct r600_resource {
>>>
>>>         /* Resource state. */
>>>         unsigned                        domains;
>>> +
>>> +       /* The buffer range which is initialized (with a write transfer,
>>> +        * streamout, DMA, or as a random access target). The rest of
>>> +        * the buffer is considered invalid and can be mapped unsynchronized.
>>> +        *
>>> +        * This allows unsychronized mapping of a buffer range which hasn't
>>> +        * been used yet. It's for applications which forget to use
>>> +        * the unsynchronized map flag and expect the driver to figure it out.
>>> +         */
>>> +       struct util_range               valid_buffer_range;
>>>  };
>>>
>>>  #define R600_BLOCK_MAX_BO              32
>>> diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
>>> index da6e529..8080611 100644
>>> --- a/src/gallium/drivers/r600/r600_buffer.c
>>> +++ b/src/gallium/drivers/r600/r600_buffer.c
>>> @@ -34,6 +34,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen,
>>>  {
>>>         struct r600_resource *rbuffer = r600_resource(buf);
>>>
>>> +       util_range_destroy(&rbuffer->valid_buffer_range);
>>>         pb_reference(&rbuffer->buf, NULL);
>>>         FREE(rbuffer);
>>>  }
>>> @@ -98,6 +99,14 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
>>>
>>>         assert(box->x + box->width <= resource->width0);
>>>
>>> +       /* See if the buffer range being mapped has never been initialized,
>>> +        * in which case it can be mapped unsynchronized. */
>>> +       if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
>>> +           usage & PIPE_TRANSFER_WRITE &&
>>> +           !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
>>> +               usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
>>> +       }
>>> +
>>>         if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
>>>             !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
>>>                 assert(usage & PIPE_TRANSFER_WRITE);
>>> @@ -182,6 +191,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
>>>  {
>>>         struct r600_context *rctx = (struct r600_context*)pipe;
>>>         struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
>>> +       struct r600_resource *rbuffer = r600_resource(transfer->resource);
>>>
>>>         if (rtransfer->staging) {
>>>                 struct pipe_resource *dst, *src;
>>> @@ -207,6 +217,11 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
>>>                 }
>>>                 pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
>>>         }
>>> +
>>> +       if (transfer->usage & PIPE_TRANSFER_WRITE) {
>>> +               util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
>>> +                              transfer->box.x + transfer->box.width);
>>> +       }
>>>         util_slab_free(&rctx->pool_transfers, transfer);
>>>  }
>>>
>>> @@ -263,6 +278,7 @@ bool r600_init_resource(struct r600_screen *rscreen,
>>>
>>>         res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
>>>         res->domains = domains;
>>> +       util_range_set_empty(&res->valid_buffer_range);
>>>         return true;
>>>  }
>>>
>>> @@ -279,6 +295,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
>>>         pipe_reference_init(&rbuffer->b.b.reference, 1);
>>>         rbuffer->b.b.screen = screen;
>>>         rbuffer->b.vtbl = &r600_buffer_vtbl;
>>> +       util_range_init(&rbuffer->valid_buffer_range);
>>>
>>>         if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE, templ->usage)) {
>>>                 FREE(rbuffer);
>>> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
>>> index 450f6ac..677c6fc 100644
>>> --- a/src/gallium/drivers/r600/r600_hw_context.c
>>> +++ b/src/gallium/drivers/r600/r600_hw_context.c
>>> @@ -1143,6 +1143,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
>>>
>>>         /* Invalidate the read caches. */
>>>         rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES;
>>> +
>>> +       util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
>>> +                      dst_offset + size);
>>>  }
>>>
>>>  void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw)
>>> @@ -1189,4 +1192,7 @@ void r600_dma_copy(struct r600_context *rctx,
>>>                 src_offset += csize << shift;
>>>                 size -= csize;
>>>         }
>>> +
>>> +       util_range_add(&rdst->valid_buffer_range, dst_offset,
>>> +                      dst_offset + size);
>>>  }
>>> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
>>> index 8906695..69c6be1 100644
>>> --- a/src/gallium/drivers/r600/r600_state_common.c
>>> +++ b/src/gallium/drivers/r600/r600_state_common.c
>>> @@ -967,6 +967,7 @@ r600_create_so_target(struct pipe_context *ctx,
>>>  {
>>>         struct r600_context *rctx = (struct r600_context *)ctx;
>>>         struct r600_so_target *t;
>>> +       struct r600_resource *rbuffer = (struct r600_resource*)buffer;
>>>
>>>         t = CALLOC_STRUCT(r600_so_target);
>>>         if (!t) {
>>> @@ -986,6 +987,9 @@ r600_create_so_target(struct pipe_context *ctx,
>>>         pipe_resource_reference(&t->b.buffer, buffer);
>>>         t->b.buffer_offset = buffer_offset;
>>>         t->b.buffer_size = buffer_size;
>>> +
>>> +       util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
>>> +                      buffer_offset + buffer_size);
>>>         return &t->b;
>>>  }
>>>
>>> --
>>> 1.7.10.4
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list