[Mesa-dev] [PATCH 5/8] i965 gen6: Initial implementation of transform feedback.
Eric Anholt
eric at anholt.net
Sat Dec 17 11:50:07 PST 2011
On Tue, 13 Dec 2011 15:35:18 -0800, Paul Berry <stereotype441 at gmail.com> wrote:
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
> index f5d5898..c323a73 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> @@ -183,7 +183,31 @@ static void populate_key( struct brw_context *brw,
> } else if (intel->gen == 6) {
> /* On Gen6, GS is used for transform feedback. */
> /* _NEW_TRANSFORM_FEEDBACK */
> - key->need_gs_prog = ctx->TransformFeedback.CurrentObject->Active;
> + if (ctx->TransformFeedback.CurrentObject->Active) {
> + const struct gl_shader_program *shaderprog =
> + ctx->Shader.CurrentVertexProgram;
> + const struct gl_transform_feedback_info *linked_xfb_info =
> + &shaderprog->LinkedTransformFeedback;
> + int i;
> +
> + /* Make sure that the VUE slots won't overflow the unsigned chars in
> + * key->transform_feedback_bindings[].
> + */
> + assert (BRW_VERT_RESULT_MAX <= 256);
We generally don't put a space between assert and the opening paren.
Also, this assert is really weird to me. Neither of the two fields in
the key are related to 256 -- there's a 7-bit field, and a
BRW_MAX_SOL_BINDINGS field. (Also, for compile-time checks like this,
we now have STATIC_ASSERT. Hooray!)
> @@ -107,12 +119,27 @@ static void brw_gs_overwrite_header_dw2(struct brw_gs_compile *c,
> * of DWORD 2. URB_WRITE messages need the primitive type in bits 6:2 of
> * DWORD 2. So this function extracts the primitive type field, bitshifts it
> * appropriately, and stores it in c->reg.header.
> + *
> + * Also, if num_verts is 3, it converts primitive type
> + * _3DPRIM_TRISTRIP_REVERSE to _3DPRIM_TRISTRIP, so that odd numbered
> + * triangles in a triangle strip will render correctly.
> */
> -static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c)
> +static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c,
> + unsigned num_verts)
> {
> struct brw_compile *p = &c->func;
> brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2),
> brw_imm_ud(0x1f));
> + if (num_verts == 3) {
> + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ,
> + get_element_ud(c->reg.header, 2),
> + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
> + {
> + brw_MOV(p, get_element_ud(c->reg.header, 2),
> + brw_imm_ud(_3DPRIM_TRISTRIP));
> + brw_set_predicate_control(p, BRW_PREDICATE_NONE);
> + }
> + }
This doesn't mess up normal rendering of tristrips in any way? It looks
like it could be a separate commit to me.
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index f9b0b71..4ec9ef5 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +/**
> + * Set up a binding table entry for use by stream output logic (transform
> + * feedback).
> + *
> + * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
> + */
> +static void
> +brw_update_sol_surface(struct brw_context *brw, drm_intel_bo *bo,
> + uint32_t *out_offset, unsigned num_vector_components,
> + unsigned stride_dwords, unsigned offset_dwords,
> + uint32_t buffer_size_minus_1)
> +{
> + uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
> + out_offset);
> + uint32_t width = buffer_size_minus_1 & 0x7f;
> + uint32_t height = (buffer_size_minus_1 & 0xfff80) >> 7;
> + uint32_t depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
> + uint32_t pitch_minus_1 = 4*stride_dwords - 1;
> + uint32_t surface_format;
> + uint32_t offset_bytes = 4 * offset_dwords;
Could I get just a little more whitespace in your code? Between the
declaration and the switch, after the switch, etc.?
> + surf[1] = bo->offset + offset_bytes; /* reloc */
> + surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
> + height << BRW_SURFACE_HEIGHT_SHIFT);
> + surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
> + pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
This looks to me like the hardware width/heigth/depth fields are ending
up programmed as 1 larger than you intend. Compare to
brw_create_constant_surface().
> + /* Emit relocation to surface contents. Section 5.1.1 of the gen4
> + * bspec ("Data Cache") says that the data cache does not exist as
> + * a separate cache and is just the sampler cache.
> + */
> + drm_intel_bo_emit_reloc(brw->intel.batch.bo,
> + *out_offset + 4,
> + bo, offset_bytes,
> + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
> +}
If it's the sampler cache, that would be (I915_GEM_DOMAIN_SAMPLER, 0).
But it obviously isn't, since we're reading and writing.
> diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
> new file mode 100644
> index 0000000..af372c1
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/gen6_sol.c
> @@ -0,0 +1,102 @@
> +/*
> + * Copyright © 2011 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +/** \file gen6_sol.c
> + *
> + * Code to initialize the binding table entries used by transform feedback.
> + */
> +
> +#include "brw_context.h"
> +#include "intel_buffer_objects.h"
> +#include "brw_defines.h"
> +
> +static void
> +brw_update_sol_surfaces(struct brw_context *brw)
If it's gen6-only, let's call it gen6_update_sol_surfaces.
> +{
> + unsigned buffer_offsets[BRW_MAX_SOL_BUFFERS];
> + struct gl_context *ctx = &brw->intel.ctx;
> + struct intel_context *intel = &brw->intel;
> + /* _NEW_TRANSFORM_FEEDBACK */
> + struct gl_transform_feedback_object *xfb_obj =
> + ctx->TransformFeedback.CurrentObject;
> + /* BRW_NEW_VERTEX_PROGRAM */
> + const struct gl_shader_program *shaderprog =
> + ctx->Shader.CurrentVertexProgram;
> + const struct gl_transform_feedback_info *linked_xfb_info =
> + &shaderprog->LinkedTransformFeedback;
> + int i;
> + for (i = 0; i < BRW_MAX_SOL_BUFFERS; ++i)
> + buffer_offsets[i] = xfb_obj->Offset[i] / 4;
> + for (i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) {
> + const int surf_index = SURF_INDEX_SOL_BINDING(i);
If you're going to mess with indentation to get your indented code to
fit, that's often a good indication that a separate function is in
order.
The shadowed loop index made me twitch, but it's actually valid.
> + if (xfb_obj->Active && i < linked_xfb_info->NumOutputs) {
> + unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer;
> + struct gl_buffer_object *buffer_obj = xfb_obj->Buffers[buffer];
> + drm_intel_bo *bo = intel_buffer_object(buffer_obj)->buffer;
> + size_t size_dwords = buffer_obj->Size / 4;
> + unsigned num_components = linked_xfb_info->Outputs[i].NumComponents;
> + unsigned stride = linked_xfb_info->BufferStride[buffer];
> + uint32_t buffer_size_minus_1;
> + /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
> + * too big to map using a single binding table entry?
> + */
> + assert ((size_dwords - buffer_offsets[buffer]) / stride
> + <= BRW_MAX_NUM_BUFFER_ENTRIES);
> + if (size_dwords > buffer_offsets[buffer] + num_components) {
> + /* There is room for at least 1 transform feedback output in the
> + * buffer. Compute the number of additional transform feedback
> + * outputs the buffer has room for.
> + */
> + buffer_size_minus_1 =
> + (size_dwords - buffer_offsets[buffer] - num_components)
> + / stride;
> + } else {
> + /* There isn't even room for a single transform feedback output in
> + * the buffer. We can't configure the binding table entry to
> + * prevent output entirely; we'll have to rely on the geometry
> + * shader to detect overflow. But to minimize the damage in case
> + * of a bug, set up the binding table entry to just allow a single
> + * output.
> + */
> + buffer_size_minus_1 = 0;
> + }
> + intel->vtbl.update_sol_surface(
> + brw, bo, &brw->bind.surf_offset[surf_index],
> + num_components, stride, buffer_offsets[buffer],
> + buffer_size_minus_1);
> + buffer_offsets[buffer] += num_components;
> + } else {
> + brw->bind.surf_offset[surf_index] = 0;
> + }
> + }
> +}
brw_update_sol_surface() looks like it would rather live here than in
brw_wm_surface_state.c, and it shouldn't be in a vtable.
I think moving most of the logic in this loop into the
brw_update_sol_surface() function would make me happier.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 197 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20111217/5db8f2f9/attachment.pgp>
More information about the mesa-dev
mailing list