[Mesa-dev] [PATCH 1/2] i965: Massively simplify the intel_upload implementation.

Eric Anholt eric at anholt.net
Tue Mar 25 11:27:58 PDT 2014


The implementation kept a page-sized area for uploading data, and
uploaded chunks from that to a 64kb-sized streamed buffer.  This wasted
cache footprint (and extra state tracking to do so) when we want to just
write our data into the buffer immediately.

Instead, build it around an interface like brw_state_batch() that just
gets you a pointer to BO memory to upload your stuff immediately.

Improves OpenArena on HSW by 1.62209% +/- 0.355299% (n=61) and on BYT by
1.7916% +/- 0.415743% (n=31).

v2: Rebase on Mesa master, drop old prototypes.  Re-do performance
    comparison on a kernel that doesn't punish CPU efficiency
    improvements.
---
 src/mesa/drivers/dri/i965/brw_context.h          |   5 +-
 src/mesa/drivers/dri/i965/brw_draw_upload.c      |  10 +-
 src/mesa/drivers/dri/i965/intel_buffer_objects.h |  21 +--
 src/mesa/drivers/dri/i965/intel_upload.c         | 167 +++++++++--------------
 4 files changed, 77 insertions(+), 126 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 04af5d0..e119682 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1041,10 +1041,7 @@ struct brw_context
 
    struct {
       drm_intel_bo *bo;
-      GLuint offset;
-      uint32_t buffer_len;
-      uint32_t buffer_offset;
-      char buffer[4096];
+      uint32_t next_offset;
    } upload;
 
    /**
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index e261163..a579025 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -381,21 +381,17 @@ copy_array_to_vbo_array(struct brw_context *brw,
    const unsigned char *src = element->glarray->Ptr + min * src_stride;
    int count = max - min + 1;
    GLuint size = count * dst_stride;
+   uint8_t *dst = intel_upload_space(brw, size, dst_stride,
+                                     &buffer->bo, &buffer->offset);
 
    if (dst_stride == src_stride) {
-      intel_upload_data(brw, src, size, dst_stride,
-			&buffer->bo, &buffer->offset);
+      memcpy(dst, src, size);
    } else {
-      char * const map = intel_upload_map(brw, size, dst_stride);
-      char *dst = map;
-
       while (count--) {
 	 memcpy(dst, src, dst_stride);
 	 src += src_stride;
 	 dst += dst_stride;
       }
-      intel_upload_unmap(brw, map, size, dst_stride,
-			 &buffer->bo, &buffer->offset);
    }
    buffer->stride = dst_stride;
 }
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
index b27d25f..5eaf9dc 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
@@ -90,16 +90,17 @@ drm_intel_bo *intel_bufferobj_buffer(struct brw_context *brw,
                                      uint32_t size);
 
 void intel_upload_data(struct brw_context *brw,
-		       const void *ptr, GLuint size, GLuint align,
-		       drm_intel_bo **return_bo,
-		       GLuint *return_offset);
-
-void *intel_upload_map(struct brw_context *brw,
-		       GLuint size, GLuint align);
-void intel_upload_unmap(struct brw_context *brw,
-			const void *ptr, GLuint size, GLuint align,
-			drm_intel_bo **return_bo,
-			GLuint *return_offset);
+                       const void *data,
+                       uint32_t size,
+                       uint32_t alignment,
+                       drm_intel_bo **out_bo,
+                       uint32_t *out_offset);
+
+void *intel_upload_space(struct brw_context *brw,
+                         uint32_t size,
+                         uint32_t alignment,
+                         drm_intel_bo **out_bo,
+                         uint32_t *out_offset);
 
 void intel_upload_finish(struct brw_context *brw);
 
diff --git a/src/mesa/drivers/dri/i965/intel_upload.c b/src/mesa/drivers/dri/i965/intel_upload.c
index ec3109b..bb3f615 100644
--- a/src/mesa/drivers/dri/i965/intel_upload.c
+++ b/src/mesa/drivers/dri/i965/intel_upload.c
@@ -57,127 +57,84 @@ intel_upload_finish(struct brw_context *brw)
    if (!brw->upload.bo)
       return;
 
-   if (brw->upload.buffer_len) {
-      drm_intel_bo_subdata(brw->upload.bo,
-                           brw->upload.buffer_offset,
-                           brw->upload.buffer_len,
-                           brw->upload.buffer);
-      brw->upload.buffer_len = 0;
-   }
-
+   drm_intel_bo_unmap(brw->upload.bo);
    drm_intel_bo_unreference(brw->upload.bo);
    brw->upload.bo = NULL;
+   brw->upload.next_offset = 0;
 }
 
-static void
-wrap_buffers(struct brw_context *brw, GLuint size)
-{
-   intel_upload_finish(brw);
-
-   if (size < INTEL_UPLOAD_SIZE)
-      size = INTEL_UPLOAD_SIZE;
-
-   brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "upload", size, 0);
-   brw->upload.offset = 0;
-}
-
-void
-intel_upload_data(struct brw_context *brw,
-                  const void *ptr, GLuint size, GLuint align,
-                  drm_intel_bo **return_bo,
-                  GLuint *return_offset)
-{
-   GLuint base, delta;
-
-   base = ALIGN_NPOT(brw->upload.offset, align);
-   if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) {
-      wrap_buffers(brw, size);
-      base = 0;
-   }
-
-   drm_intel_bo_reference(brw->upload.bo);
-   *return_bo = brw->upload.bo;
-   *return_offset = base;
-
-   delta = base - brw->upload.offset;
-   if (brw->upload.buffer_len &&
-       brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer)) {
-      drm_intel_bo_subdata(brw->upload.bo,
-                           brw->upload.buffer_offset,
-                           brw->upload.buffer_len,
-                           brw->upload.buffer);
-      brw->upload.buffer_len = 0;
-   }
-
-   if (size < sizeof(brw->upload.buffer)) {
-      if (brw->upload.buffer_len == 0)
-         brw->upload.buffer_offset = base;
-      else
-         brw->upload.buffer_len += delta;
-
-      memcpy(brw->upload.buffer + brw->upload.buffer_len, ptr, size);
-      brw->upload.buffer_len += size;
-   } else {
-      drm_intel_bo_subdata(brw->upload.bo, base, size, ptr);
-   }
-
-   brw->upload.offset = base + size;
-}
-
+/**
+ * Interface for getting memory for uploading streamed data to the GPU
+ *
+ * In most cases, streamed data (for GPU state structures, for example) is
+ * uploaded through brw_state_batch(), since that interface allows relocations
+ * from the streamed space returned to other BOs.  However, that interface has
+ * the restriction that the amount of space allocated has to be "small" (see
+ * estimated_max_prim_size in brw_draw.c).
+ *
+ * This interface, on the other hand, is able to handle arbitrary sized
+ * allocation requests, though it will batch small allocations into the same
+ * BO for efficiency and reduced memory footprint.
+ *
+ * \note The returned pointer is valid only until intel_upload_finish(), which
+ * will happen at batch flush or the next
+ * intel_upload_space()/intel_upload_data().
+ *
+ * \param out_bo Pointer to a BO, which must point to a valid BO or NULL on
+ * entry, and will have a reference to the new BO containing the state on
+ * return.
+ *
+ * \param out_offset Offset within the buffer object that the data will land.
+ */
 void *
-intel_upload_map(struct brw_context *brw, GLuint size, GLuint align)
+intel_upload_space(struct brw_context *brw,
+                   uint32_t size,
+                   uint32_t alignment,
+                   drm_intel_bo **out_bo,
+                   uint32_t *out_offset)
 {
-   GLuint base, delta;
-   char *ptr;
+   uint32_t offset;
 
-   base = ALIGN_NPOT(brw->upload.offset, align);
-   if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) {
-      wrap_buffers(brw, size);
-      base = 0;
+   offset = ALIGN_NPOT(brw->upload.next_offset, alignment);
+   if (brw->upload.bo && offset + size > brw->upload.bo->size) {
+      intel_upload_finish(brw);
+      offset = 0;
    }
 
-   delta = base - brw->upload.offset;
-   if (brw->upload.buffer_len &&
-       brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer)) {
-      drm_intel_bo_subdata(brw->upload.bo,
-                           brw->upload.buffer_offset,
-                           brw->upload.buffer_len,
-                           brw->upload.buffer);
-      brw->upload.buffer_len = 0;
+   if (!brw->upload.bo) {
+      brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "streamed data",
+                                          MAX2(INTEL_UPLOAD_SIZE, size), 4096);
+      if (brw->has_llc)
+         drm_intel_bo_map(brw->upload.bo, true);
+      else
+         drm_intel_gem_bo_map_gtt(brw->upload.bo);
    }
 
-   if (size <= sizeof(brw->upload.buffer)) {
-      if (brw->upload.buffer_len == 0)
-         brw->upload.buffer_offset = base;
-      else
-         brw->upload.buffer_len += delta;
+   brw->upload.next_offset = offset + size;
 
-      ptr = brw->upload.buffer + brw->upload.buffer_len;
-      brw->upload.buffer_len += size;
-   } else {
-      ptr = malloc(size);
+   *out_offset = offset;
+   if (*out_bo != brw->upload.bo) {
+      drm_intel_bo_unreference(*out_bo);
+      *out_bo = brw->upload.bo;
+      drm_intel_bo_reference(brw->upload.bo);
    }
 
-   return ptr;
+   return brw->upload.bo->virtual + offset;
 }
 
+/**
+ * Handy interface to upload some data to temporary GPU memory quickly.
+ *
+ * References to this memory should not be retained across batch flushes.
+ */
 void
-intel_upload_unmap(struct brw_context *brw,
-                   const void *ptr, GLuint size, GLuint align,
-                   drm_intel_bo **return_bo,
-                   GLuint *return_offset)
+intel_upload_data(struct brw_context *brw,
+                  const void *data,
+                  uint32_t size,
+                  uint32_t alignment,
+                  drm_intel_bo **out_bo,
+                  uint32_t *out_offset)
 {
-   GLuint base;
-
-   base = ALIGN_NPOT(brw->upload.offset, align);
-   if (size > sizeof(brw->upload.buffer)) {
-      drm_intel_bo_subdata(brw->upload.bo, base, size, ptr);
-      free((void*)ptr);
-   }
-
-   drm_intel_bo_reference(brw->upload.bo);
-   *return_bo = brw->upload.bo;
-   *return_offset = base;
-
-   brw->upload.offset = base + size;
+   void *dst = intel_upload_space(brw, size, alignment, out_bo, out_offset);
+   memcpy(dst, data, size);
 }
-- 
1.9.0



More information about the mesa-dev mailing list