[Intel-gfx] Some questions about dri_bo_emit_reloc
Carl Worth
cworth at cworth.org
Mon Oct 27 23:51:21 CET 2008
These questions are mostly for Eric, but I missed my chance to ask him
when he was sitting next to me today, so I figure I might as well ask on
a public mailing list.
There's a patch below, but I'm not proposing it for review/commit yet,
(note that it's just git-diff output---no commit message yet, etc.). I'm
sure it's wrong in multiple ways, and it's guaranteed to do nothing
better than wedge your GPU. I'm including it here only for context for
some of the questions below.
1. How do I determine the correct values for the read_domains argument?
I found the list in i915_drm.h but when I'm emitting the binding_table
and surface_state objects is that RENDER or SAMPLER? How do I know?
2. Similarly, for write_domains I assume I want just 0 as I'm not asking
the GPU to write anything here?
3. The order of the arguments to dri_bo_emit_reloc is really confusing.
The names "delta" and "offset" are confusing too. Amd I reading things
correctly that what we have here is basically (using "..." in place of
the read_domains and write_domains flags):
source_bo, ..., target_offset, source_offset, target_bo
If so, that's hard to remember to get right. Could we consider moving to
something with ordered pairs of (buffer_object, offset for that bo)?
Another confusing part of this is that intel_batch_emit_reloc puts the
target_bo as the first argument while dri_bo_emit_reloc has it as the
last.
4. My patch below adds buffer objects for binding_table and
surface_state but doesn't add them to my check_aperture_space call yet.
Am I correct in assuming that if I setup the binding table with relocs
to the surface_state that I only need to add the binding_table buffer
object to check_aperture_space and not the surface_state buffer object
as well?
5. Since after emitting the batch header I never again need the
binding_table and surface_state buffer objects, (unlike the
vertex_buffer), I'm not holding on to them. Instead I'm just using local
variables with dri_bo_unreference after emitting the binding_table's
reloc into the batch. Does that seem reasonable/correct?
6. Anything else obviously wrong in the patch below?
Note that I'm not yet adding relocs from the surface_state to the buffer
objects for the pixmaps (if any). I'll need to do that and then test
this with both UXA and EXA+Jesse's patch adding buffer objects for
pixmaps.
Thanks for any comments on this preliminary work,
-Carl
diff --git a/src/i965_render.c b/src/i965_render.c
index 3ebd209..f5174d9 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -453,14 +453,6 @@ typedef struct brw_surface_state_padded {
*
* This structure contains static data for all of the combinations of
* state that we use for Render acceleration.
- *
- * Meanwhile, gen4_render_state_t should contain all dynamic data,
- * but we're still in the process of migrating some data out of
- * gen4_static_state_t to gen4_render_state_t. Things remaining to be
- * migrated include
- *
- * surface_state
- * binding_table
*/
typedef struct _gen4_static_state {
uint8_t wm_scratch[128 * PS_MAX_THREADS];
@@ -494,10 +486,6 @@ typedef struct _gen4_static_state {
WM_STATE_DECL (masknoca_affine);
WM_STATE_DECL (masknoca_projective);
- uint32_t binding_table[128];
-
- struct brw_surface_state_padded surface_state[32];
-
/* Index by [src_filter][src_extend][mask_filter][mask_extend]. Two of
* the structs happen to add to 32 bytes.
*/
@@ -537,8 +525,6 @@ struct gen4_render_state {
gen4_composite_op composite_op;
- int binding_table_index;
- int surface_state_index;
int vb_offset;
int vertex_size;
};
@@ -883,20 +869,15 @@ sampler_state_extend_from_picture (int repeat_type)
}
/**
- * Sets up the common fields for a surface state buffer for the given picture
- * in the surface state buffer at index, and returns the offset within the
- * state buffer for this entry.
+ * Sets up the common fields for a surface state buffer for the given
+ * picture in the given surface state buffer.
*/
-static unsigned int
-i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss,
+static void
+i965_set_picture_surface_state(struct brw_surface_state *ss,
PicturePtr pPicture, PixmapPtr pPixmap,
Bool is_dst)
{
- I830Ptr pI830 = I830PTR(pScrn);
- struct gen4_render_state *render_state= pI830->gen4_render_state;
- gen4_static_state_t *static_state = render_state->static_state;
struct brw_surface_state local_ss;
- uint32_t offset;
/* Since ss is a pointer to WC memory, do all of our bit operations
* into a local temporary first.
@@ -935,11 +916,6 @@ i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss,
local_ss.ss3.tiled_surface = i830_pixmap_tiled(pPixmap) ? 1 : 0;
memcpy(ss, &local_ss, sizeof(local_ss));
-
- offset = (char *)ss - (char *)static_state;
- assert((offset & 31) == 0);
-
- return offset;
}
@@ -985,7 +961,6 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
{
I830Ptr pI830 = I830PTR(pScrn);
struct gen4_render_state *render_state= pI830->gen4_render_state;
- gen4_static_state_t *static_state = render_state->static_state;
gen4_composite_op *composite_op = &render_state->composite_op;
int op = composite_op->op;
PicturePtr pSrcPicture = composite_op->source_picture;
@@ -1009,6 +984,7 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
uint32_t src_blend, dst_blend;
uint32_t *binding_table;
dri_bo *bo_table[NUM_BO];
+ dri_bo *binding_table_bo, *surface_state_bo;
if (render_state->vertex_buffer_bo == NULL) {
render_state->vertex_buffer_bo = dri_bo_alloc (pI830->bufmgr, "vb",
@@ -1076,48 +1052,52 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format,
&src_blend, &dst_blend);
- if ((render_state->binding_table_index + 3 >=
- ARRAY_SIZE(static_state->binding_table)) ||
- (render_state->surface_state_index + 3 >=
- ARRAY_SIZE(static_state->surface_state)))
- {
- i830WaitSync(pScrn);
- render_state->binding_table_index = 0;
- render_state->surface_state_index = 0;
- render_state->vb_offset = 0;
- }
+ binding_table_bo = dri_bo_alloc (pI830->bufmgr, "binding_table",
+ 3 * sizeof (uint32_t), 4096);
+ dri_bo_map (binding_table_bo, 1);
+ binding_table = binding_table_bo->virtual;
- binding_table = static_state->binding_table +
- render_state->binding_table_index;
- ss = static_state->surface_state + render_state->surface_state_index;
- /* We only use 2 or 3 entries, but the table has to be 32-byte
- * aligned.
- */
- render_state->binding_table_index += 8;
- render_state->surface_state_index += (pMask != NULL) ? 3 : 2;
+ surface_state_bo = dri_bo_alloc (pI830->bufmgr, "surface_state",
+ 3 * sizeof (brw_surface_state_padded),
+ 4096);
+ dri_bo_map (surface_state_bo, 1);
+ ss = surface_state_bo->virtual;
/* Set up and bind the state buffer for the destination surface */
- binding_table[0] = state_base_offset +
- i965_set_picture_surface_state(pScrn,
- &ss[0].state,
- pDstPicture, pDst, TRUE);
+ i965_set_picture_surface_state(&ss[0].state,
+ pDstPicture, pDst, TRUE);
+ binding_table[0] = 0 * sizeof (brw_surface_state_padded);
+ dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_RENDER, 0,
+ 0 * sizeof (brw_surface_state_padded),
+ 0 * sizeof (uint32_t),
+ surface_state_bo);
/* Set up and bind the source surface state buffer */
- binding_table[1] = state_base_offset +
- i965_set_picture_surface_state(pScrn,
- &ss[1].state,
- pSrcPicture, pSrc, FALSE);
+ i965_set_picture_surface_state(&ss[1].state,
+ pSrcPicture, pSrc, FALSE);
+ binding_table[1] = 1 * sizeof (brw_surface_state_padded);
+ dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_RENDER, 0,
+ 1 * sizeof (brw_surface_state_padded),
+ 1 * sizeof (uint32_t),
+ surface_state_bo);
+
if (pMask) {
/* Set up and bind the mask surface state buffer */
- binding_table[2] = state_base_offset +
- i965_set_picture_surface_state(pScrn,
- &ss[2].state,
- pMaskPicture, pMask,
- FALSE);
+ i965_set_picture_surface_state(&ss[2].state,
+ pMaskPicture, pMask,
+ FALSE);
+ binding_table[2] = 2 * sizeof (brw_surface_state_padded);
+ dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_RENDER, 0,
+ 2 * sizeof (brw_surface_state_padded),
+ 2 * sizeof (uint32_t),
+ surface_state_bo);
} else {
binding_table[2] = 0;
}
+ dri_bo_unmap (binding_table_bo);
+ dri_bo_unmap (surface_state_bo);
+
src_filter = sampler_state_filter_from_picture (pSrcPicture->filter);
if (src_filter < 0)
I830FALLBACK ("Bad src filter 0x%x\n", pSrcPicture->filter);
@@ -1197,8 +1177,7 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
/* Only the PS uses the binding table */
- assert((((unsigned char *)binding_table - pI830->FbBase) & 31) == 0);
- OUT_BATCH((unsigned char *)binding_table - pI830->FbBase);
+ OUT_RELOC(binding_table_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
/* The drawing rectangle clipping is always on. Set it to values that
* shouldn't do any clipping.
@@ -1372,6 +1351,10 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
ErrorF("try to sync to show any errors...\n");
I830Sync(pScrn);
#endif
+
+ dri_bo_unreference (binding_table_bo);
+ dri_bo_unreference (surface_state_bo);
+
return TRUE;
}
#undef NUM_BO
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 189 bytes
Desc: This is a digitally signed message part
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20081027/774a4b21/attachment.sig>
More information about the Intel-gfx
mailing list