[Intel-gfx] [PATCH 6/6] Move vertex buffer out of gen4_static_state into new gen4_dynamic_state
Carl Worth
cworth at cworth.org
Thu Oct 23 02:35:12 CEST 2008
This begins the process of separating the dynamic data from the
static data, (still to move are the surface state and binding
table objects). The new dynamic_state is stored in a buffer
object, so this patch restores the buffer-object-for-vertex-buffer
functionality originally in commit 1abf4d3a7a and later reverted
in 5c9a62a29f.
A notable difference is that this time we actually do use
check_aperture_space to ensure things will fit, (assuming
there's a non-empty implementation under that), so the non-GEM
mode that enforces this be called should be happy now.
---
src/i965_render.c | 74 +++++++++++++++++++++++++++++++++++++++++++----------
1 files changed, 60 insertions(+), 14 deletions(-)
diff --git a/src/i965_render.c b/src/i965_render.c
index a9d7f66..5659df4 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -60,7 +60,7 @@ do { \
#endif
#define MAX_VERTEX_PER_COMPOSITE 24
-#define MAX_VERTEX_BUFFERS 256
+#define VERTEX_BUFFER_SIZE (256 * MAX_VERTEX_PER_COMPOSITE)
struct blendinfo {
Bool dst_alpha;
@@ -445,11 +445,16 @@ typedef struct brw_surface_state_padded {
/**
* Gen4 rendering state buffer structure.
*
- * Ideally this structure would contain static data for all of the
- * combinations of state that we use for Render acceleration, and
- * another buffer would contain the dynamic surface state, binding
- * table, and vertex data. We'll be moving to that organization soon,
- * so we use that naming already.
+ * This structure contains static data for all of the combinations of
+ * state that we use for Render acceleration.
+ *
+ * Meanwhile, gen4_dynamic_state_t should contain all dynamic data,
+ * but we're still in the process of migrating some data out of
+ * gen4_static_state_t to gen4_dynamic_state_t. Things remaining to be
+ * migrated include
+ *
+ * surface_state
+ * binding_table
*/
typedef struct _gen4_static_state {
uint8_t wm_scratch[128 * PS_MAX_THREADS];
@@ -503,15 +508,19 @@ typedef struct _gen4_static_state {
[BRW_BLENDFACTOR_COUNT];
struct brw_cc_viewport cc_viewport;
PAD64 (brw_cc_viewport, 0);
-
- float vb[MAX_VERTEX_PER_COMPOSITE * MAX_VERTEX_BUFFERS];
} gen4_static_state_t;
+typedef struct gen4_dynamic_state_state {
+ float vb[VERTEX_BUFFER_SIZE];
+} gen4_dynamic_state;
+
/** Private data for gen4 render accel implementation. */
struct gen4_render_state {
gen4_static_state_t *static_state;
uint32_t static_state_offset;
+ dri_bo* dynamic_state_bo;
+
int binding_table_index;
int surface_state_index;
int vb_offset;
@@ -940,6 +949,28 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
int state_base_offset;
uint32_t src_blend, dst_blend;
uint32_t *binding_table;
+ dri_bo *bo_table[1]; /* Just dynamic_state_bo for now */
+
+ if (render_state->dynamic_state_bo == NULL) {
+ render_state->dynamic_state_bo = dri_bo_alloc (pI830->bufmgr, "vb",
+ sizeof (gen4_dynamic_state),
+ 4096);
+ }
+
+ bo_table[0] = render_state->dynamic_state_bo;
+
+ /* If this command won't fit in the current batch, flush. */
+ if (dri_bufmgr_check_aperture_space (bo_table, 1) < 0)
+ intel_batch_flush (pScrn, FALSE);
+
+ /* If the command still won't fit in an empty batch, then it's
+ * just plain too big for the hardware---fallback to software.
+ */
+ if (dri_bufmgr_check_aperture_space (bo_table, 1) < 0) {
+ dri_bo_unreference (render_state->dynamic_state_bo);
+ render_state->dynamic_state_bo = NULL;
+ return FALSE;
+ }
IntelEmitInvarientState(pScrn);
*pI830->last_3d = LAST_3D_RENDER;
@@ -1288,11 +1319,11 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
I830Ptr pI830 = I830PTR(pScrn);
struct gen4_render_state *render_state = pI830->gen4_render_state;
- gen4_static_state_t *static_state = render_state->static_state;
+ gen4_dynamic_state *dynamic_state;
Bool has_mask;
Bool is_affine_src, is_affine_mask, is_affine;
float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
- float *vb = static_state->vb;
+ float *vb;
int i;
is_affine_src = i830_transform_is_affine (pI830->transform[0]);
@@ -1369,11 +1400,23 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
}
}
- if (render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE >= ARRAY_SIZE(static_state->vb)) {
- i830WaitSync(pScrn);
+ /* Arrange for a dynamic_state buffer object with sufficient space
+ * for our vertices. */
+ if (render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE > VERTEX_BUFFER_SIZE) {
+ dri_bo_unreference (render_state->dynamic_state_bo);
+
+ render_state->dynamic_state_bo = dri_bo_alloc (pI830->bufmgr, "vb",
+ sizeof (gen4_dynamic_state),
+ 4096);
render_state->vb_offset = 0;
}
+ /* Map the dynamic_state buffer object so we can write to the
+ * vertex buffer within it. */
+ dri_bo_map (render_state->dynamic_state_bo, 1);
+ dynamic_state = render_state->dynamic_state_bo->virtual;
+ vb = dynamic_state->vb;
+
i = render_state->vb_offset;
/* rect (x2,y2) */
vb[i++] = (float)(dstX + w);
@@ -1416,7 +1459,9 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
if (!is_affine)
vb[i++] = mask_w[0];
}
- assert (i * 4 <= sizeof(static_state->vb));
+ assert (i <= VERTEX_BUFFER_SIZE);
+
+ dri_bo_unmap (render_state->dynamic_state_bo);
BEGIN_BATCH(12);
OUT_BATCH(MI_FLUSH);
@@ -1425,7 +1470,8 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
VB0_VERTEXDATA |
(render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
- OUT_BATCH(render_state->static_state_offset + offsetof(gen4_static_state_t, vb) +
+ OUT_RELOC(render_state->dynamic_state_bo, I915_GEM_DOMAIN_VERTEX, 0,
+ offsetof(gen4_dynamic_state, vb) +
render_state->vb_offset * 4);
OUT_BATCH(3);
OUT_BATCH(0); // ignore for VERTEXDATA, but still there
--
1.5.6.5
More information about the Intel-gfx
mailing list