[Mesa-dev] [PATCH 1/2] i965: Massively simplify the intel_upload implementation.
Kenneth Graunke
kenneth at whitecape.org
Tue Mar 25 13:54:06 PDT 2014
On 03/25/2014 11:27 AM, Eric Anholt wrote:
> The implementation kept a page-sized area for uploading data, and
> uploaded chunks from that to a 64kb-sized streamed buffer. This wasted
> cache footprint (and extra state tracking to do so) when we want to just
> write our data into the buffer immediately.
>
> Instead, build it around an interface like brw_state_batch() that just
> gets you a pointer to BO memory to upload your stuff immediately.
>
> Improves OpenArena on HSW by 1.62209% +/- 0.355299% (n=61) and on BYT by
> 1.7916% +/- 0.415743% (n=31).
>
> v2: Rebase on Mesa master, drop old prototypes. Re-do performance
> comparison on a kernel that doesn't punish CPU efficiency
> improvements.
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 5 +-
> src/mesa/drivers/dri/i965/brw_draw_upload.c | 10 +-
> src/mesa/drivers/dri/i965/intel_buffer_objects.h | 21 +--
> src/mesa/drivers/dri/i965/intel_upload.c | 167 +++++++++--------------
> 4 files changed, 77 insertions(+), 126 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 04af5d0..e119682 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1041,10 +1041,7 @@ struct brw_context
>
> struct {
> drm_intel_bo *bo;
> - GLuint offset;
> - uint32_t buffer_len;
> - uint32_t buffer_offset;
> - char buffer[4096];
> + uint32_t next_offset;
> } upload;
>
> /**
> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> index e261163..a579025 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> @@ -381,21 +381,17 @@ copy_array_to_vbo_array(struct brw_context *brw,
> const unsigned char *src = element->glarray->Ptr + min * src_stride;
> int count = max - min + 1;
> GLuint size = count * dst_stride;
> + uint8_t *dst = intel_upload_space(brw, size, dst_stride,
> + &buffer->bo, &buffer->offset);
>
It sure seems like these references will exist across batchbuffers,
meaning we may hold on to the upload buffer containing our vertex data
until we do copy_array_to_vbo_array on it again. That seems
unfortunate. (Or, maybe I'm wrong?)
However, I'm pretty sure the old code had that same problem, so your new
code is still a huge improvement.
Patch 1 is:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
> if (dst_stride == src_stride) {
> - intel_upload_data(brw, src, size, dst_stride,
> - &buffer->bo, &buffer->offset);
> + memcpy(dst, src, size);
> } else {
> - char * const map = intel_upload_map(brw, size, dst_stride);
> - char *dst = map;
> -
> while (count--) {
> memcpy(dst, src, dst_stride);
> src += src_stride;
> dst += dst_stride;
> }
> - intel_upload_unmap(brw, map, size, dst_stride,
> - &buffer->bo, &buffer->offset);
> }
> buffer->stride = dst_stride;
> }
> diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
> index b27d25f..5eaf9dc 100644
> --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h
> +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
> @@ -90,16 +90,17 @@ drm_intel_bo *intel_bufferobj_buffer(struct brw_context *brw,
> uint32_t size);
>
> void intel_upload_data(struct brw_context *brw,
> - const void *ptr, GLuint size, GLuint align,
> - drm_intel_bo **return_bo,
> - GLuint *return_offset);
> -
> -void *intel_upload_map(struct brw_context *brw,
> - GLuint size, GLuint align);
> -void intel_upload_unmap(struct brw_context *brw,
> - const void *ptr, GLuint size, GLuint align,
> - drm_intel_bo **return_bo,
> - GLuint *return_offset);
> + const void *data,
> + uint32_t size,
> + uint32_t alignment,
> + drm_intel_bo **out_bo,
> + uint32_t *out_offset);
> +
> +void *intel_upload_space(struct brw_context *brw,
> + uint32_t size,
> + uint32_t alignment,
> + drm_intel_bo **out_bo,
> + uint32_t *out_offset);
>
> void intel_upload_finish(struct brw_context *brw);
>
> diff --git a/src/mesa/drivers/dri/i965/intel_upload.c b/src/mesa/drivers/dri/i965/intel_upload.c
> index ec3109b..bb3f615 100644
> --- a/src/mesa/drivers/dri/i965/intel_upload.c
> +++ b/src/mesa/drivers/dri/i965/intel_upload.c
> @@ -57,127 +57,84 @@ intel_upload_finish(struct brw_context *brw)
> if (!brw->upload.bo)
> return;
>
> - if (brw->upload.buffer_len) {
> - drm_intel_bo_subdata(brw->upload.bo,
> - brw->upload.buffer_offset,
> - brw->upload.buffer_len,
> - brw->upload.buffer);
> - brw->upload.buffer_len = 0;
> - }
> -
> + drm_intel_bo_unmap(brw->upload.bo);
> drm_intel_bo_unreference(brw->upload.bo);
> brw->upload.bo = NULL;
> + brw->upload.next_offset = 0;
> }
>
> -static void
> -wrap_buffers(struct brw_context *brw, GLuint size)
> -{
> - intel_upload_finish(brw);
> -
> - if (size < INTEL_UPLOAD_SIZE)
> - size = INTEL_UPLOAD_SIZE;
> -
> - brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "upload", size, 0);
> - brw->upload.offset = 0;
> -}
> -
> -void
> -intel_upload_data(struct brw_context *brw,
> - const void *ptr, GLuint size, GLuint align,
> - drm_intel_bo **return_bo,
> - GLuint *return_offset)
> -{
> - GLuint base, delta;
> -
> - base = ALIGN_NPOT(brw->upload.offset, align);
> - if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) {
> - wrap_buffers(brw, size);
> - base = 0;
> - }
> -
> - drm_intel_bo_reference(brw->upload.bo);
> - *return_bo = brw->upload.bo;
> - *return_offset = base;
> -
> - delta = base - brw->upload.offset;
> - if (brw->upload.buffer_len &&
> - brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer)) {
> - drm_intel_bo_subdata(brw->upload.bo,
> - brw->upload.buffer_offset,
> - brw->upload.buffer_len,
> - brw->upload.buffer);
> - brw->upload.buffer_len = 0;
> - }
> -
> - if (size < sizeof(brw->upload.buffer)) {
> - if (brw->upload.buffer_len == 0)
> - brw->upload.buffer_offset = base;
> - else
> - brw->upload.buffer_len += delta;
> -
> - memcpy(brw->upload.buffer + brw->upload.buffer_len, ptr, size);
> - brw->upload.buffer_len += size;
> - } else {
> - drm_intel_bo_subdata(brw->upload.bo, base, size, ptr);
> - }
> -
> - brw->upload.offset = base + size;
> -}
> -
> +/**
> + * Interface for getting memory for uploading streamed data to the GPU
> + *
> + * In most cases, streamed data (for GPU state structures, for example) is
> + * uploaded through brw_state_batch(), since that interface allows relocations
> + * from the streamed space returned to other BOs. However, that interface has
> + * the restriction that the amount of space allocated has to be "small" (see
> + * estimated_max_prim_size in brw_draw.c).
> + *
> + * This interface, on the other hand, is able to handle arbitrary sized
> + * allocation requests, though it will batch small allocations into the same
> + * BO for efficiency and reduced memory footprint.
> + *
> + * \note The returned pointer is valid only until intel_upload_finish(), which
> + * will happen at batch flush or the next
> + * intel_upload_space()/intel_upload_data().
> + *
> + * \param out_bo Pointer to a BO, which must point to a valid BO or NULL on
> + * entry, and will have a reference to the new BO containing the state on
> + * return.
> + *
> + * \param out_offset Offset within the buffer object that the data will land.
> + */
> void *
> -intel_upload_map(struct brw_context *brw, GLuint size, GLuint align)
> +intel_upload_space(struct brw_context *brw,
> + uint32_t size,
> + uint32_t alignment,
> + drm_intel_bo **out_bo,
> + uint32_t *out_offset)
> {
> - GLuint base, delta;
> - char *ptr;
> + uint32_t offset;
>
> - base = ALIGN_NPOT(brw->upload.offset, align);
> - if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) {
> - wrap_buffers(brw, size);
> - base = 0;
> + offset = ALIGN_NPOT(brw->upload.next_offset, alignment);
> + if (brw->upload.bo && offset + size > brw->upload.bo->size) {
> + intel_upload_finish(brw);
> + offset = 0;
> }
>
> - delta = base - brw->upload.offset;
> - if (brw->upload.buffer_len &&
> - brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer)) {
> - drm_intel_bo_subdata(brw->upload.bo,
> - brw->upload.buffer_offset,
> - brw->upload.buffer_len,
> - brw->upload.buffer);
> - brw->upload.buffer_len = 0;
> + if (!brw->upload.bo) {
> + brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "streamed data",
> + MAX2(INTEL_UPLOAD_SIZE, size), 4096);
> + if (brw->has_llc)
> + drm_intel_bo_map(brw->upload.bo, true);
> + else
> + drm_intel_gem_bo_map_gtt(brw->upload.bo);
> }
>
> - if (size <= sizeof(brw->upload.buffer)) {
> - if (brw->upload.buffer_len == 0)
> - brw->upload.buffer_offset = base;
> - else
> - brw->upload.buffer_len += delta;
> + brw->upload.next_offset = offset + size;
>
> - ptr = brw->upload.buffer + brw->upload.buffer_len;
> - brw->upload.buffer_len += size;
> - } else {
> - ptr = malloc(size);
> + *out_offset = offset;
> + if (*out_bo != brw->upload.bo) {
> + drm_intel_bo_unreference(*out_bo);
> + *out_bo = brw->upload.bo;
> + drm_intel_bo_reference(brw->upload.bo);
> }
>
> - return ptr;
> + return brw->upload.bo->virtual + offset;
> }
>
> +/**
> + * Handy interface to upload some data to temporary GPU memory quickly.
> + *
> + * References to this memory should not be retained across batch flushes.
> + */
> void
> -intel_upload_unmap(struct brw_context *brw,
> - const void *ptr, GLuint size, GLuint align,
> - drm_intel_bo **return_bo,
> - GLuint *return_offset)
> +intel_upload_data(struct brw_context *brw,
> + const void *data,
> + uint32_t size,
> + uint32_t alignment,
> + drm_intel_bo **out_bo,
> + uint32_t *out_offset)
> {
> - GLuint base;
> -
> - base = ALIGN_NPOT(brw->upload.offset, align);
> - if (size > sizeof(brw->upload.buffer)) {
> - drm_intel_bo_subdata(brw->upload.bo, base, size, ptr);
> - free((void*)ptr);
> - }
> -
> - drm_intel_bo_reference(brw->upload.bo);
> - *return_bo = brw->upload.bo;
> - *return_offset = base;
> -
> - brw->upload.offset = base + size;
> + void *dst = intel_upload_space(brw, size, alignment, out_bo, out_offset);
> + memcpy(dst, data, size);
> }
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: OpenPGP digital signature
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140325/a661c832/attachment-0001.sig>
More information about the mesa-dev
mailing list