[Intel-gfx] [PATCH] Use buffer objects for binding table and surface-state objects.
Carl Worth
cworth at cworth.org
Tue Nov 4 02:15:51 CET 2008
Instead of having a static array for these and doing an ugly sync
everytime we recycle the array, we know simply allocate short-lived
buffer objects for this dynamic state. The dri layer, in turn, can
take care of efficiently reusing objects as necessary.
On a GM965 this change was tested to improve the performance of
x11perf -aa10text from roughly 120000 to 154000 glyphs/sec.
---
src/i965_render.c | 107 ++++++++++++++++++++++------------------------------
1 files changed, 45 insertions(+), 62 deletions(-)
diff --git a/src/i965_render.c b/src/i965_render.c
index b28b2ce..d39915a 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -453,14 +453,6 @@ typedef struct brw_surface_state_padded {
*
* This structure contains static data for all of the combinations of
* state that we use for Render acceleration.
- *
- * Meanwhile, gen4_render_state_t should contain all dynamic data,
- * but we're still in the process of migrating some data out of
- * gen4_static_state_t to gen4_render_state_t. Things remaining to be
- * migrated include
- *
- * surface_state
- * binding_table
*/
typedef struct _gen4_static_state {
uint8_t wm_scratch[128 * PS_MAX_THREADS];
@@ -494,10 +486,6 @@ typedef struct _gen4_static_state {
WM_STATE_DECL (masknoca_affine);
WM_STATE_DECL (masknoca_projective);
- uint32_t binding_table[128];
-
- struct brw_surface_state_padded surface_state[32];
-
/* Index by [src_filter][src_extend][mask_filter][mask_extend]. Two of
* the structs happen to add to 32 bytes.
*/
@@ -537,8 +525,6 @@ struct gen4_render_state {
gen4_composite_op composite_op;
- int binding_table_index;
- int surface_state_index;
int vb_offset;
int vertex_size;
};
@@ -883,20 +869,15 @@ sampler_state_extend_from_picture (int repeat_type)
}
/**
- * Sets up the common fields for a surface state buffer for the given picture
- * in the surface state buffer at index, and returns the offset within the
- * state buffer for this entry.
+ * Sets up the common fields for a surface state buffer for the given
+ * picture in the given surface state buffer.
*/
-static unsigned int
-i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss,
+static void
+i965_set_picture_surface_state(struct brw_surface_state *ss,
PicturePtr pPicture, PixmapPtr pPixmap,
Bool is_dst)
{
- I830Ptr pI830 = I830PTR(pScrn);
- struct gen4_render_state *render_state= pI830->gen4_render_state;
- gen4_static_state_t *static_state = render_state->static_state;
struct brw_surface_state local_ss;
- uint32_t offset;
/* Since ss is a pointer to WC memory, do all of our bit operations
* into a local temporary first.
@@ -935,11 +916,6 @@ i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss,
local_ss.ss3.tiled_surface = i830_pixmap_tiled(pPixmap) ? 1 : 0;
memcpy(ss, &local_ss, sizeof(local_ss));
-
- offset = (char *)ss - (char *)static_state;
- assert((offset & 31) == 0);
-
- return offset;
}
@@ -985,7 +961,6 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
{
I830Ptr pI830 = I830PTR(pScrn);
struct gen4_render_state *render_state= pI830->gen4_render_state;
- gen4_static_state_t *static_state = render_state->static_state;
gen4_composite_op *composite_op = &render_state->composite_op;
int op = composite_op->op;
PicturePtr pSrcPicture = composite_op->source_picture;
@@ -1009,6 +984,7 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
uint32_t src_blend, dst_blend;
uint32_t *binding_table;
dri_bo *bo_table[NUM_BO];
+ dri_bo *binding_table_bo, *surface_state_bo;
if (render_state->vertex_buffer_bo == NULL) {
render_state->vertex_buffer_bo = dri_bo_alloc (pI830->bufmgr, "vb",
@@ -1076,48 +1052,52 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format,
&src_blend, &dst_blend);
- if ((render_state->binding_table_index + 3 >=
- ARRAY_SIZE(static_state->binding_table)) ||
- (render_state->surface_state_index + 3 >=
- ARRAY_SIZE(static_state->surface_state)))
- {
- i830WaitSync(pScrn);
- render_state->binding_table_index = 0;
- render_state->surface_state_index = 0;
- render_state->vb_offset = 0;
- }
+ binding_table_bo = dri_bo_alloc (pI830->bufmgr, "binding_table",
+ 3 * sizeof (uint32_t), 4096);
+ dri_bo_map (binding_table_bo, 1);
+ binding_table = binding_table_bo->virtual;
- binding_table = static_state->binding_table +
- render_state->binding_table_index;
- ss = static_state->surface_state + render_state->surface_state_index;
- /* We only use 2 or 3 entries, but the table has to be 32-byte
- * aligned.
- */
- render_state->binding_table_index += 8;
- render_state->surface_state_index += (pMask != NULL) ? 3 : 2;
+ surface_state_bo = dri_bo_alloc (pI830->bufmgr, "surface_state",
+ 3 * sizeof (brw_surface_state_padded),
+ 4096);
+ dri_bo_map (surface_state_bo, 1);
+ ss = surface_state_bo->virtual;
/* Set up and bind the state buffer for the destination surface */
- binding_table[0] = state_base_offset +
- i965_set_picture_surface_state(pScrn,
- &ss[0].state,
- pDstPicture, pDst, TRUE);
+ i965_set_picture_surface_state(&ss[0].state,
+ pDstPicture, pDst, TRUE);
+ binding_table[0] = 0 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
+ dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0 * sizeof (brw_surface_state_padded),
+ 0 * sizeof (uint32_t),
+ surface_state_bo);
/* Set up and bind the source surface state buffer */
- binding_table[1] = state_base_offset +
- i965_set_picture_surface_state(pScrn,
- &ss[1].state,
- pSrcPicture, pSrc, FALSE);
+ i965_set_picture_surface_state(&ss[1].state,
+ pSrcPicture, pSrc, FALSE);
+ binding_table[1] = 1 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
+ dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 1 * sizeof (brw_surface_state_padded),
+ 1 * sizeof (uint32_t),
+ surface_state_bo);
+
if (pMask) {
/* Set up and bind the mask surface state buffer */
- binding_table[2] = state_base_offset +
- i965_set_picture_surface_state(pScrn,
- &ss[2].state,
- pMaskPicture, pMask,
- FALSE);
+ i965_set_picture_surface_state(&ss[2].state,
+ pMaskPicture, pMask,
+ FALSE);
+ binding_table[2] = 2 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
+ dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 2 * sizeof (brw_surface_state_padded),
+ 2 * sizeof (uint32_t),
+ surface_state_bo);
} else {
binding_table[2] = 0;
}
+ dri_bo_unmap (binding_table_bo);
+ dri_bo_unmap (surface_state_bo);
+
src_filter = sampler_state_filter_from_picture (pSrcPicture->filter);
if (src_filter < 0)
I830FALLBACK ("Bad src filter 0x%x\n", pSrcPicture->filter);
@@ -1197,8 +1177,7 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
/* Only the PS uses the binding table */
- assert((((unsigned char *)binding_table - pI830->FbBase) & 31) == 0);
- OUT_BATCH((unsigned char *)binding_table - pI830->FbBase);
+ OUT_RELOC(binding_table_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0);
/* The drawing rectangle clipping is always on. Set it to values that
* shouldn't do any clipping.
@@ -1372,6 +1351,10 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
ErrorF("try to sync to show any errors...\n");
I830Sync(pScrn);
#endif
+
+ dri_bo_unreference (binding_table_bo);
+ dri_bo_unreference (surface_state_bo);
+
return TRUE;
}
#undef NUM_BO
--
1.5.6.5
More information about the Intel-gfx
mailing list