[Intel-gfx] [PATCH 6/6] Move vertex buffer out of gen4_static_state into new gen4_dynamic_state

Thu Oct 23 05:45:48 CEST 2008

On Wed, 2008-10-22 at 17:35 -0700, Carl Worth wrote:
> This begins the process of separating the dynamic data from the
> static data, (still to move are the surface state and binding
> table objects). The new dynamic_state is stored in a buffer
> object, so this patch restores the buffer-object-for-vertex-buffer
> functionality originally in commit 1abf4d3a7a and later reverted
> in 5c9a62a29f.
> 
> A notable difference is that this time we actually do use
> check_aperture_space to ensure things will fit, (assuming
> there's a non-empty implementation under that), so the non-GEM
> mode that enforces this be called should be happy now.

ACK on the previous patches, and comments inline on this one.

> ---
>  src/i965_render.c |   74 +++++++++++++++++++++++++++++++++++++++++++----------
>  1 files changed, 60 insertions(+), 14 deletions(-)
> 
> diff --git a/src/i965_render.c b/src/i965_render.c
> index a9d7f66..5659df4 100644
> --- a/src/i965_render.c
> +++ b/src/i965_render.c
> @@ -60,7 +60,7 @@ do { 							\
>  #endif
>  
>  #define MAX_VERTEX_PER_COMPOSITE    24
> -#define MAX_VERTEX_BUFFERS	    256
> +#define VERTEX_BUFFER_SIZE	    (256 * MAX_VERTEX_PER_COMPOSITE)

This isn't your fault (it may even be mine), but I'd love to see
MAX_VERTEX_PER_COMPOSITE get renamed.  The current value of 24 means 4
vertices * 3 texcoords/vertex * 2 floats/texcoord, i.e. not actually a
number of vertices.  Maybe MAX_FLOATS_PER_COMPOSITE or something?

>  struct blendinfo {
>      Bool dst_alpha;
> @@ -445,11 +445,16 @@ typedef struct brw_surface_state_padded {
>  /**
>   * Gen4 rendering state buffer structure.
>   *
> - * Ideally this structure would contain static data for all of the
> - * combinations of state that we use for Render acceleration, and
> - * another buffer would contain the dynamic surface state, binding
> - * table, and vertex data. We'll be moving to that organization soon,
> - * so we use that naming already.
> + * This structure contains static data for all of the combinations of
> + * state that we use for Render acceleration.
> + *
> + * Meanwhile, gen4_dynamic_state_t should contain all dynamic data,
> + * but we're still in the process of migrating some data out of
> + * gen4_static_state_t to gen4_dynamic_state_t. Things remaining to be
> + * migrated include
> + *
> + *	surface_state
> + *	binding_table
>   */
>  typedef struct _gen4_static_state {
>      uint8_t wm_scratch[128 * PS_MAX_THREADS];
> @@ -503,15 +508,19 @@ typedef struct _gen4_static_state {
>  				     [BRW_BLENDFACTOR_COUNT];
>      struct brw_cc_viewport cc_viewport;
>      PAD64 (brw_cc_viewport, 0);
> -
> -    float vb[MAX_VERTEX_PER_COMPOSITE * MAX_VERTEX_BUFFERS];
>  } gen4_static_state_t;
>  
> +typedef struct gen4_dynamic_state_state {
> +    float vb[VERTEX_BUFFER_SIZE];
> +} gen4_dynamic_state;
> +
>  /** Private data for gen4 render accel implementation. */
>  struct gen4_render_state {
>      gen4_static_state_t *static_state;
>      uint32_t static_state_offset;
>  
> +    dri_bo* dynamic_state_bo;
> +
>      int binding_table_index;
>      int surface_state_index;
>      int vb_offset;
> @@ -940,6 +949,28 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
>      int state_base_offset;
>      uint32_t src_blend, dst_blend;
>      uint32_t *binding_table;
> +    dri_bo *bo_table[1]; /* Just dynamic_state_bo for now */
> +
> +    if (render_state->dynamic_state_bo == NULL) {
> +	render_state->dynamic_state_bo = dri_bo_alloc (pI830->bufmgr, "vb",
> +						       sizeof (gen4_dynamic_state),
> +						       4096);
> +    }
> +
> +    bo_table[0] = render_state->dynamic_state_bo;

You need to include the batchbuffer in the array, otherwise the check
won't know about all the other things you're planning to load in with
this batchbuffer.

> +
> +    /* If this command won't fit in the current batch, flush. */
> +    if (dri_bufmgr_check_aperture_space (bo_table, 1) < 0)
> +	intel_batch_flush (pScrn, FALSE);
> +
> +    /* If the command still won't fit in an empty batch, then it's
> +     * just plain too big for the hardware---fallback to software.
> +     */
> +    if (dri_bufmgr_check_aperture_space (bo_table, 1) < 0) {
> +	dri_bo_unreference (render_state->dynamic_state_bo);
> +	render_state->dynamic_state_bo = NULL;
> +        return FALSE;
> +    }
>  
>      IntelEmitInvarientState(pScrn);
>      *pI830->last_3d = LAST_3D_RENDER;
> @@ -1288,11 +1319,11 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
>      ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
>      I830Ptr pI830 = I830PTR(pScrn);
>      struct gen4_render_state *render_state = pI830->gen4_render_state;
> -    gen4_static_state_t *static_state = render_state->static_state;
> +    gen4_dynamic_state *dynamic_state;
>      Bool has_mask;
>      Bool is_affine_src, is_affine_mask, is_affine;
>      float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
> -    float *vb = static_state->vb;
> +    float *vb;
>      int i;
>  
>      is_affine_src = i830_transform_is_affine (pI830->transform[0]);
> @@ -1369,11 +1400,23 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
>  	}
>      }
>  
> -    if (render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE >= ARRAY_SIZE(static_state->vb)) {
> -	i830WaitSync(pScrn);
> +    /* Arrange for a dynamic_state buffer object with sufficient space
> +     * for our vertices. */
> +    if (render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE > VERTEX_BUFFER_SIZE) {
> +	dri_bo_unreference (render_state->dynamic_state_bo);
> +
> +	render_state->dynamic_state_bo = dri_bo_alloc (pI830->bufmgr, "vb",
> +						       sizeof (gen4_dynamic_state),
> +						       4096);
>  	render_state->vb_offset = 0;
>      }
>  
> +    /* Map the dynamic_state buffer object so we can write to the
> +     * vertex buffer within it. */
> +    dri_bo_map (render_state->dynamic_state_bo, 1);
> +    dynamic_state = render_state->dynamic_state_bo->virtual;
> +    vb = dynamic_state->vb;
> +
>      i = render_state->vb_offset;
>      /* rect (x2,y2) */
>      vb[i++] = (float)(dstX + w);
> @@ -1416,7 +1459,9 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
>  	if (!is_affine)
>  	    vb[i++] = mask_w[0];
>      }
> -    assert (i * 4 <= sizeof(static_state->vb));
> +    assert (i <= VERTEX_BUFFER_SIZE);
> +
> +    dri_bo_unmap (render_state->dynamic_state_bo);

When you allocate a new vb here, you need to check_aperture again.
Otherwise you may end up accumulating a batchbuffer referencing a ton of
VBs so that you can't fit the whole thing in at once.  Not big deal
today (particularly since the assert that you tripped over last time is
gone), but when we've got our pixmaps in BOs you'll near the failing
point a lot more often.  Basically, just set up the array again with
your batch and your new vb, and if it's too big then flush.  After that,
you're sure that you'll be successful post-flushing and don't need to
check again.

-- 
Eric Anholt
eric at anholt.net                         eric.anholt at intel.com

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 197 bytes
Desc: This is a digitally signed message part
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20081022/d67b168d/attachment.sig>