On 22 December 2011 16:54, Eric Anholt <span dir="ltr"><<a href="mailto:eric@anholt.net" target="_blank">eric@anholt.net</a>></span> wrote:<br><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Fixes almost all of the transform feedback piglit tests. Remaining<br>
are a few tests related to tesselation for<br>
quads/trifans/tristrips/polygons with flat shading.<br>
---<br>
src/mesa/drivers/dri/i965/gen7_sol_state.c | 199 ++++++++++++++++++++++++++-<br>
1 files changed, 191 insertions(+), 8 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c<br>
index 650f625..a5e28b6 100644<br>
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c<br>
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c<br>
@@ -32,31 +32,214 @@<br>
#include "brw_state.h"<br>
#include "brw_defines.h"<br>
#include "intel_batchbuffer.h"<br>
+#include "intel_buffer_objects.h"<br>
<br>
static void<br>
-upload_sol_state(struct brw_context *brw)<br>
+upload_3dstate_so_buffers(struct brw_context *brw)<br>
+{<br>
+ struct intel_context *intel = &brw->intel;<br>
+ struct gl_context *ctx = &intel->ctx;<br>
+ /* BRW_NEW_VERTEX_PROGRAM */<br>
+ const struct gl_shader_program *vs_prog =<br>
+ ctx->Shader.CurrentVertexProgram;<br>
+ const struct gl_transform_feedback_info *linked_xfb_info =<br>
+ &vs_prog->LinkedTransformFeedback;<br>
+ struct gl_transform_feedback_object *xfb_obj =<br>
+ ctx->TransformFeedback.CurrentObject;<br></blockquote><div><br>Can we have a "/* NEW_TRANSFORM_FEEDBACK */" comment here?<br> </div><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ int i;<br>
+<br>
+ /* Set up the up to 4 output buffers. These are the ranges defined in the<br>
+ * gl_transform_feedback_object.<br>
+ */<br>
+ for (i = 0; i < 4; i++) {<br>
+ struct gl_buffer_object *bufferobj = xfb_obj->Buffers[i];<br>
+ drm_intel_bo *bo;<br>
+ uint32_t start, end;<br>
+<br>
+ if (!xfb_obj->Buffers[i]) {<br>
+ /* The pitch of 0 in this command indicates that the buffer is<br>
+ * unbound and won't be written to.<br>
+ */<br>
+ BEGIN_BATCH(4);<br>
+ OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2));<br>
+ OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT));<br>
+ OUT_BATCH(0);<br>
+ OUT_BATCH(0);<br>
+ ADVANCE_BATCH();<br>
+<br>
+ continue;<br>
+ }<br>
+<br>
+ bo = intel_buffer_object(bufferobj)->buffer;<br>
+<br>
+ start = xfb_obj->Offset[i];<br>
+ assert(start % 4 == 0);<br>
+ end = ALIGN(start + xfb_obj->Size[i], 4);<br>
+ assert(end <= bo->size);<br>
+<br>
+ BEGIN_BATCH(4);<br>
+ OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2));<br>
+ OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT) |<br>
+ ((linked_xfb_info->BufferStride[i] * 4) <<<br>
+ SO_BUFFER_PITCH_SHIFT));<br></blockquote><div><br>It looks like we're not setting "SO Buffer Object Control State". Is that ok? I'm not too familiar with memory object control states so I'm not sure, but it seemed to me that it might be sensible to mark the stream output as L3 cacheable.<br>
</div><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start);<br>
+ OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, end);<br>
+ ADVANCE_BATCH();<br>
+ }<br>
+}<br>
+<br>
+/**<br>
+ * Outputs the 3DSTATE_SO_DECL_LIST command.<br>
+ *<br>
+ * The data output is a series of 64-bit entries containing a SO_DECL per<br>
+ * stream. We only have one stream of rendering coming out of the GS unit, so<br>
+ * we only emit stream 0 (low 16 bits) SO_DECLs.<br>
+ */<br>
+static void<br>
+upload_3dstate_so_decl_list(struct brw_context *brw,<br>
+ struct brw_vue_map *vue_map)<br>
+{<br>
+ struct intel_context *intel = &brw->intel;<br>
+ struct gl_context *ctx = &intel->ctx;<br>
+ /* BRW_NEW_VERTEX_PROGRAM */<br>
+ const struct gl_shader_program *vs_prog =<br>
+ ctx->Shader.CurrentVertexProgram;<br>
+ /* NEW_TRANSFORM_FEEDBACK */<br>
+ const struct gl_transform_feedback_info *linked_xfb_info =<br>
+ &vs_prog->LinkedTransformFeedback;<br>
+ int i;<br>
+ uint16_t so_decl[128];<br></blockquote><div><br>Can we add an assertion to verify that there is no danger of overflowing this array? I think STATIC_ASSERT(ARRAY_SIZE(so_decl) >= MAX_PROGRAM_OUTPUTS) ought to do the trick.<br>
</div><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ int buffer_mask = 0;<br>
+ int next_offset[4] = {0, 0, 0, 0}; <br></blockquote><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
+ /* Construct the list of SO_DECLs to be emitted. The formatting of the<br>
+ * command is feels strange -- each dword pair contains a SO_DECL per stream.<br>
+ */<br>
+ for (i = 0; i < linked_xfb_info->NumOutputs; i++) {<br>
+ int buffer = linked_xfb_info->Outputs[i].OutputBuffer;<br>
+ uint16_t decl = 0;<br>
+ int vert_result = linked_xfb_info->Outputs[i].OutputRegister;<br>
+<br>
+ buffer_mask |= 1 << buffer;<br>
+<br>
+ decl |= buffer << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;<br>
+ decl |= vue_map->vert_result_to_slot[vert_result] <<<br>
+ SO_DECL_REGISTER_INDEX_SHIFT;<br>
+ decl |= ((1 << linked_xfb_info->Outputs[i].NumComponents) - 1) <<<br>
+ SO_DECL_COMPONENT_MASK_SHIFT;<br>
+<br>
+ /* FINISHME */<br>
+ assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);<br></blockquote><div><br>FYI, this assertion should hold true until we implement ARB_transfrom_feedback3 (which allows holes in the transform feedback structure). I think Marek has some plans to implement that for Gallium (not sure of his timeframe though), so we may want to keep an eye out.<br>
</div><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
+ next_offset[buffer] += linked_xfb_info->Outputs[i].NumComponents;<br>
+<br>
+ so_decl[i] = decl;<br>
+ }<br>
+<br>
+ BEGIN_BATCH(linked_xfb_info->NumOutputs * 2 + 3);<br>
+ OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 |<br>
+ (linked_xfb_info->NumOutputs * 2 + 1));<br>
+<br>
+ OUT_BATCH((buffer_mask << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) |<br>
+ (0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) |<br>
+ (0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) |<br>
+ (0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT));<br>
+<br>
+ OUT_BATCH((linked_xfb_info->NumOutputs << SO_NUM_ENTRIES_0_SHIFT) |<br>
+ (0 << SO_NUM_ENTRIES_1_SHIFT) |<br>
+ (0 << SO_NUM_ENTRIES_2_SHIFT) |<br>
+ (0 << SO_NUM_ENTRIES_3_SHIFT));<br>
+<br>
+ for (i = 0; i < linked_xfb_info->NumOutputs; i++) {<br>
+ OUT_BATCH(so_decl[i]);<br>
+ OUT_BATCH(0);<br>
+ }<br>
+<br>
+ ADVANCE_BATCH();<br>
+}<br>
+<br>
+static void<br>
+upload_3dstate_streamout(struct brw_context *brw, bool active,<br>
+ struct brw_vue_map *vue_map)<br>
{<br>
struct intel_context *intel = &brw->intel;<br>
struct gl_context *ctx = &intel->ctx;<br>
- uint32_t dw1 = 0;<br>
+ /* _NEW_TRANSFORM_FEEDBACK */<br>
+ struct gl_transform_feedback_object *xfb_obj =<br>
+ ctx->TransformFeedback.CurrentObject;<br>
+ uint32_t dw1 = 0, dw2 = 0;<br>
+ int i;<br>
<br>
/* _NEW_RASTERIZER_DISCARD */<br>
if (ctx->RasterDiscard)<br>
dw1 |= SO_RENDERING_DISABLE;<br>
<br>
- /* Disable the SOL stage */<br>
+ if (active) {<br>
+ int urb_entry_read_offset = 0;<br>
+ int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -<br>
+ urb_entry_read_offset;<br>
+<br>
+ dw1 |= SO_FUNCTION_ENABLE;<br>
+ dw1 |= SO_STATISTICS_ENABLE;<br></blockquote><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
+ for (i = 0; i < 4; i++) {<br>
+ if (xfb_obj->Buffers[i]) {<br>
+ dw1 |= SO_BUFFER_ENABLE_0 << i; <br></blockquote><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ }<br>
+ }<br>
+<br>
+ /* We always read the whole vertex. This could be reduced at some<br>
+ * point by reading less and offsetting the register index in the<br>
+ * SO_DECLs.<br>
+ */<br>
+ dw2 |= urb_entry_read_offset << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT;<br>
+ dw2 |= (urb_entry_read_length - 1) <<<br>
+ SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;<br>
+ }<br>
+<br>
BEGIN_BATCH(3);<br>
OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));<br>
- OUT_BATCH(0);<br>
- OUT_BATCH(0);<br>
+ OUT_BATCH(dw1);<br>
+ OUT_BATCH(dw2);<br>
ADVANCE_BATCH();<br>
}<br>
<br>
+static void<br>
+upload_sol_state(struct brw_context *brw)<br>
+{<br>
+ struct intel_context *intel = &brw->intel;<br>
+ struct gl_context *ctx = &intel->ctx;<br>
+ /* _NEW_TRANSFORM_FEEDBACK */<br>
+ struct gl_transform_feedback_object *xfb_obj =<br>
+ ctx->TransformFeedback.CurrentObject;<br>
+ bool active = xfb_obj->Active && !xfb_obj->Paused;<br>
+ struct brw_vue_map vue_map;<br>
+<br>
+ /* _NEW_TRANSFORM, CACHE_NEW_VS_PROG */<br>
+ brw_compute_vue_map(&vue_map, intel, ctx->Transform.ClipPlanesEnabled != 0,<br>
+ brw->vs.prog_data->outputs_written);<br>
+<br>
+ if (active) {<br>
+ upload_3dstate_so_buffers(brw);<br>
+ upload_3dstate_so_decl_list(brw, &vue_map);<br>
+ }<br>
+<br>
+ /* Finally, set up the SOL stage. This command must always follow updates to<br>
+ * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or<br>
+ * MMIO register updates (current performed by the kernel at each batch<br>
+ * emit).<br>
+ */<br>
+ upload_3dstate_streamout(brw, active, &vue_map);<br>
+}<br>
+<br>
const struct brw_tracked_state gen7_sol_state = {<br>
.dirty = {<br>
- .mesa = _NEW_RASTERIZER_DISCARD,<br>
- .brw = BRW_NEW_BATCH,<br>
- .cache = 0,<br>
+ .mesa = (_NEW_RASTERIZER_DISCARD |<br>
+ _NEW_TRANSFORM_FEEDBACK |<br>
+ _NEW_TRANSFORM),<br>
+ .brw = (BRW_NEW_BATCH |<br>
+ BRW_NEW_VERTEX_PROGRAM),<br>
+ .cache = CACHE_NEW_VS_PROG,<br>
},<br>
.emit = upload_sol_state,<br>
};<br>
<span><font color="#888888">--<br>
1.7.7.3<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br>