[Mesa-dev] [PATCH 08/13] i965/draw: Use the real size for vertex buffers
Jason Ekstrand
jason at jlekstrand.net
Thu May 19 07:21:05 UTC 2016
Previously, we were using the size of the BO which may be substantially
larger than the actual vertex buffer size.
---
src/mesa/drivers/dri/i965/brw_context.h | 1 +
src/mesa/drivers/dri/i965/brw_draw_upload.c | 52 +++++++++++++++++++++++++++-
src/mesa/drivers/dri/i965/gen8_draw_upload.c | 2 +-
3 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 76ed1de..d1d31e0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -569,6 +569,7 @@ struct brw_vertex_buffer {
/** Buffer object containing the uploaded vertex data */
drm_intel_bo *bo;
uint32_t offset;
+ uint32_t vf_upper_bound;
/** Byte stride between elements in the uploaded array */
GLuint stride;
GLuint step_rate;
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index b651fd2..2eac385 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -365,6 +365,17 @@ brw_get_vertex_surface_type(struct brw_context *brw,
}
}
+static unsigned
+attrib_vec4_size(GLenum type)
+{
+ const int type_size = _mesa_sizeof_type(type);
+
+ /* _mesa_sizeof_type() returns > 0 for bare GL types and -1 for all of the
+ * packed formats. All of the packed formats have a size of 4.
+ */
+ return type_size > 0 ? type_size * 4 : 4;
+}
+
static void
copy_array_to_vbo_array(struct brw_context *brw,
struct brw_vertex_element *element,
@@ -373,6 +384,7 @@ copy_array_to_vbo_array(struct brw_context *brw,
GLuint dst_stride)
{
const int src_stride = element->glarray->StrideB;
+ const unsigned vec4_size = attrib_vec4_size(element->glarray->Type);
/* If the source stride is zero, we just want to upload the current
* attribute once and set the buffer's stride to 0. There's no need
@@ -385,6 +397,7 @@ copy_array_to_vbo_array(struct brw_context *brw,
&buffer->bo, &buffer->offset);
buffer->stride = 0;
+ buffer->vf_upper_bound = vec4_size;
return;
}
@@ -404,6 +417,7 @@ copy_array_to_vbo_array(struct brw_context *brw,
}
}
buffer->stride = dst_stride;
+ buffer->vf_upper_bound = size + (vec4_size - dst_stride);
}
void
@@ -457,6 +471,7 @@ brw_prepare_vertices(struct brw_context *brw)
struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX];
uint32_t buffer_range_start[VERT_ATTRIB_MAX];
uint32_t buffer_range_end[VERT_ATTRIB_MAX];
+ uint32_t buffer_range_vf_end[VERT_ATTRIB_MAX];
for (i = j = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
@@ -486,6 +501,23 @@ brw_prepare_vertices(struct brw_context *brw)
}
}
+ /* This is ugly. It's completely undocumented (as far as I can tell)
+ * but based on a little reverse-engineering, it appears that the VF
+ * stage first fetches an entire vec4 and then swizzles components
+ * into the VUE. Therefore, if any part of the vec4 lies outside of
+ * the buffer's bounds, the entire vec4 is discarded and you get
+ * entirely zeros.
+ *
+ * This means that we can't actually use tight bounds for vertex
+ * buffers. Instead, we have to pad them out so that, for the last
+ * elment, the whole vec4 fits. Unfortunately, this means there are
+ * a few corner cases where we don't handle ARB_robust_buffer_access
+ * 100% correct but they're very hard to hit and it's still safe in
+ * the sense that you shouldn't end up in someone else's buffer.
+ */
+ const unsigned vec4_size = attrib_vec4_size(glarray->Type);
+ const unsigned vf_range = range + (vec4_size - glarray->_ElementSize);
+
/* If we have a VB set to be uploaded for this buffer object
* already, reuse that VB state so that we emit fewer
* relocations.
@@ -503,6 +535,7 @@ brw_prepare_vertices(struct brw_context *brw)
buffer_range_start[k] = MIN2(buffer_range_start[k], start);
buffer_range_end[k] = MAX2(buffer_range_end[k], start + range);
+ buffer_range_vf_end[k] = MAX2(buffer_range_end[k], start + vf_range);
break;
}
}
@@ -517,6 +550,7 @@ brw_prepare_vertices(struct brw_context *brw)
enabled_buffer[j] = intel_buffer;
buffer_range_start[j] = start;
buffer_range_end[j] = start + range;
+ buffer_range_vf_end[j] = start + vf_range;
input->buffer = j++;
input->offset = 0;
@@ -580,6 +614,8 @@ brw_prepare_vertices(struct brw_context *brw)
buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range);
drm_intel_bo_reference(buffer->bo);
+
+ buffer->vf_upper_bound = buffer_range_vf_end[i];
}
/* If we need to upload all the arrays, then we can trim those arrays to
@@ -604,12 +640,24 @@ brw_prepare_vertices(struct brw_context *brw)
buffer, interleaved);
buffer->offset -= delta * interleaved;
+ /* Because we just pass upload[0] in to copy_array_to_vbo_array
+ * above, it cannot provide us with the correct vf_upper_bound.
+ * Instead, we have to calculate that ourselves.
+ */
+ unsigned elem_vf_size = 0;
+
for (i = 0; i < nr_uploads; i++) {
/* Then, just point upload[i] at upload[0]'s buffer. */
upload[i]->offset =
((const unsigned char *)upload[i]->glarray->Ptr - ptr);
upload[i]->buffer = j;
+
+ unsigned vec4_size = attrib_vec4_size(upload[i]->glarray->Type);
+ elem_vf_size = MAX2(elem_vf_size, upload[i]->offset + vec4_size);
}
+ buffer->vf_upper_bound =
+ (delta + max_index - min_index) * interleaved + elem_vf_size;
+
j++;
nr_uploads = 0;
@@ -632,6 +680,7 @@ brw_prepare_vertices(struct brw_context *brw)
buffer, upload[i]->glarray->_ElementSize);
}
buffer->offset -= delta * buffer->stride;
+ buffer->vf_upper_bound += delta * buffer->stride;
buffer->step_rate = upload[i]->glarray->InstanceDivisor;
upload[i]->buffer = j++;
upload[i]->offset = 0;
@@ -773,7 +822,8 @@ brw_emit_vertices(struct brw_context *brw)
OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
for (i = 0; i < brw->vb.nr_buffers; i++) {
struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
- EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->bo->size - 1,
+ EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo,
+ buffer->offset + buffer->vf_upper_bound - 1,
buffer->offset, buffer->stride,
buffer->step_rate);
diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
index dce11dd..722cde6 100644
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -151,7 +151,7 @@ gen8_emit_vertices(struct brw_context *brw)
OUT_BATCH(dw0);
OUT_RELOC64(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
- OUT_BATCH(buffer->bo->size);
+ OUT_BATCH(buffer->vf_upper_bound);
}
if (uses_draw_params) {
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list