[Mesa-dev] [PATCH 2/4] i965: Drop the system-memory temporary allocations for flush explicit.
Kenneth Graunke
kenneth at whitecape.org
Fri Mar 14 01:03:28 PDT 2014
On 02/27/2014 02:53 PM, Eric Anholt wrote:
> While in expected usage patterns nobody will ever hit this path, doubling
> our bandwidth usedd used seems like a waste, and it cost us extra code too.
> ---
> src/mesa/drivers/dri/i965/intel_buffer_objects.c | 103 ++++++++++++-----------
> src/mesa/drivers/dri/i965/intel_buffer_objects.h | 7 +-
> 2 files changed, 58 insertions(+), 52 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> index 5bf4533..e496836 100644
> --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> @@ -409,27 +409,21 @@ intel_bufferobj_map_range(struct gl_context * ctx,
> * guarantees the driver has advertised to the application.
> */
> const unsigned alignment = ctx->Const.MinMapBufferAlignment;
> - const unsigned extra = (uintptr_t) offset % alignment;
>
> - if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
> - intel_obj->range_map_buffer[index] = _mesa_align_malloc(length + extra,
> - alignment);
> - obj->Mappings[index].Pointer =
> - intel_obj->range_map_buffer[index] + extra;
> + intel_obj->map_extra[index] = (uintptr_t) offset % alignment;
> + intel_obj->range_map_bo[index] = drm_intel_bo_alloc(brw->bufmgr,
> + "BO blit temp",
> + length +
> + intel_obj->map_extra[index],
> + alignment);
> + if (brw->has_llc) {
> + drm_intel_bo_map(intel_obj->range_map_bo[index],
> + (access & GL_MAP_WRITE_BIT) != 0);
> } else {
> - intel_obj->range_map_bo[index] = drm_intel_bo_alloc(brw->bufmgr,
> - "range map",
> - length + extra,
> - alignment);
> - if (brw->has_llc) {
> - drm_intel_bo_map(intel_obj->range_map_bo[index],
> - (access & GL_MAP_WRITE_BIT) != 0);
> - } else {
> - drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo[index]);
> - }
> - obj->Mappings[index].Pointer =
> - intel_obj->range_map_bo[index]->virtual + extra;
> + drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo[index]);
> }
> + obj->Mappings[index].Pointer =
> + intel_obj->range_map_bo[index]->virtual + intel_obj->map_extra[index];
> return obj->Mappings[index].Pointer;
> }
>
> @@ -468,35 +462,51 @@ intel_bufferobj_flush_mapped_range(struct gl_context *ctx,
> {
> struct brw_context *brw = brw_context(ctx);
> struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
> - drm_intel_bo *temp_bo;
> + GLbitfield access = obj->Mappings[index].AccessFlags;
> +
> + assert(access & GL_MAP_FLUSH_EXPLICIT_BIT);
>
> - /* Unless we're in the range map using a temporary system buffer,
> - * there's no work to do.
> + /* If we gave a direct mapping of the buffer instead of using a temporary,
> + * then there's nothing to do.
> */
> - if (intel_obj->range_map_buffer[index] == NULL)
> + if (intel_obj->range_map_bo[index] == NULL)
> return;
>
> if (length == 0)
> return;
>
> - temp_bo = drm_intel_bo_alloc(brw->bufmgr, "range map flush", length, 64);
> -
> - /* Use obj->Pointer instead of intel_obj->range_map_buffer because the
> - * former points to the actual mapping while the latter may be offset to
> - * meet alignment guarantees.
> + /* Note that we're not unmapping our buffer while executing the blit. We
> + * need to have a mapping still at the end of this call, since the user
> + * gets to make further modifications and glFlushMappedBufferRange() calls.
> + * This is safe, because:
> + *
> + * - On LLC platforms, we're using a CPU mapping that's coherent with the
> + * GPU (except for the render caches), so the kernel doesn't need to do
> + * any flushing work for us except for what happens at batch exec time
> + * anyway.
> + *
> + * - On non-LLC platforms, we're using a GTT mapping that writes directly
> + * to system memory (except for the chipset cache that gets flushed at
> + * batch exec time).
> + *
> + * In both cases we don't need to stall for the previous blit to complete
> + * so we can re-map (and we definitely don't want to, since that would be
> + * slow): If the user edits a part of their buffer that's previously been
> + * blitted, then our lack of synchoronization is fine, because either
> + * they'll get some too-new data in the first blit and not do another blit
> + * of that area (but in that case the results are undefined), or they'll do
> + * another blit of that area and the complete newer data will land the
> + * second time.
This seems reasonable to me. The ARB_map_buffer_range specification
indicates that calling FlushMappedBufferRange indicates that a
particular range needs to be flushed. But I don't see anything saying
that data /can't/ spontaneously land without a flush call. And data
definitely doesn't have to land unless a FlushMappedBufferRange call occurs.
> */
> - drm_intel_bo_subdata(temp_bo, 0, length, obj->Mappings[index].Pointer);
> -
> intel_emit_linear_blit(brw,
> intel_obj->buffer,
> obj->Mappings[index].Offset + offset,
> - temp_bo, 0,
> + intel_obj->range_map_bo[index],
> + intel_obj->map_extra[index] + offset,
> length);
> intel_bufferobj_mark_gpu_usage(intel_obj,
> obj->Mappings[index].Offset + offset,
> length);
> -
> - drm_intel_bo_unreference(temp_bo);
> }
>
>
> @@ -514,27 +524,18 @@ intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj,
>
> assert(intel_obj);
> assert(obj->Mappings[index].Pointer);
> - if (intel_obj->range_map_buffer[index] != NULL) {
> - /* Since we've emitted some blits to buffers that will (likely) be used
> - * in rendering operations in other cache domains in this batch, emit a
> - * flush. Once again, we wish for a domain tracker in libdrm to cover
> - * usage inside of a batchbuffer.
> - */
> - intel_batchbuffer_emit_mi_flush(brw);
> - _mesa_align_free(intel_obj->range_map_buffer[index]);
> - intel_obj->range_map_buffer[index] = NULL;
> - } else if (intel_obj->range_map_bo[index] != NULL) {
> - const unsigned extra = obj->Mappings[index].Pointer -
> - intel_obj->range_map_bo[index]->virtual;
> -
> + if (intel_obj->range_map_bo[index] != NULL) {
> drm_intel_bo_unmap(intel_obj->range_map_bo[index]);
>
> - intel_emit_linear_blit(brw,
> - intel_obj->buffer, obj->Mappings[index].Offset,
> - intel_obj->range_map_bo[index], extra,
> - obj->Mappings[index].Length);
> - intel_bufferobj_mark_gpu_usage(intel_obj, obj->Mappings[index].Offset,
> - obj->Mappings[index].Length);
> + if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) {
> + intel_emit_linear_blit(brw,
> + intel_obj->buffer, obj->Mappings[index].Offset,
> + intel_obj->range_map_bo[index],
> + intel_obj->map_extra[index],
> + obj->Mappings[index].Length);
> + intel_bufferobj_mark_gpu_usage(intel_obj, obj->Mappings[index].Offset,
> + obj->Mappings[index].Length);
> + }
>
> /* Since we've emitted some blits to buffers that will (likely) be used
> * in rendering operations in other cache domains in this batch, emit a
> diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
> index 2197707..b27d25f 100644
> --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h
> +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
> @@ -43,7 +43,12 @@ struct intel_buffer_object
> drm_intel_bo *buffer; /* the low-level buffer manager's buffer handle */
>
> drm_intel_bo *range_map_bo[MAP_COUNT];
> - void *range_map_buffer[MAP_COUNT];
> +
> + /**
> + * Alignment offset from the range_map_bo temporary mapping to the returned
> + * obj->Pointer (caused by GL_ARB_map_buffer_alignment).
> + */
> + unsigned map_extra[MAP_COUNT];
>
> /** @{
> * Tracking for what range of the BO may currently be in use by the GPU.
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: OpenPGP digital signature
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140314/66444863/attachment-0001.pgp>
More information about the mesa-dev
mailing list