mesa: Branch 'index-swtnl-0.1' - 5 commits
Keith Whitwell
keithw at kemper.freedesktop.org
Wed Mar 14 19:07:21 UTC 2007
src/mesa/drivers/dri/i915tex/i830_vtbl.c | 2
src/mesa/drivers/dri/i915tex/i915_context.h | 12
src/mesa/drivers/dri/i915tex/i915_state.c | 2
src/mesa/drivers/dri/i915tex/i915_vtbl.c | 366 +++++++++++++++--------
src/mesa/drivers/dri/i915tex/intel_batchbuffer.c | 128 ++++++--
src/mesa/drivers/dri/i915tex/intel_batchbuffer.h | 91 ++++-
src/mesa/drivers/dri/i915tex/intel_context.c | 2
src/mesa/drivers/dri/i915tex/intel_tris.c | 17 -
8 files changed, 446 insertions(+), 174 deletions(-)
New commits:
diff-tree c5cf7073859dd91e3ff6a2a693eb347faf76dd49 (from b806180a2a024b4b7a4e3fe32ea33773160ef054)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date: Wed Mar 14 19:00:11 2007 +0000
Turn off lots of debug. Enable dynamic indirect state.
diff --git a/src/mesa/drivers/dri/i915tex/i915_vtbl.c b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
index bb9c60f..49c0142 100644
--- a/src/mesa/drivers/dri/i915tex/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
@@ -165,11 +165,6 @@ static GLuint emit_indirect(struct intel
GLuint delta;
GLuint segment;
- if (!state) {
- segment = 0;
- goto out;
- }
-
switch (flag) {
case LI0_STATE_DYNAMIC_INDIRECT:
segment = SEGMENT_DYNAMIC_INDIRECT;
@@ -180,11 +175,12 @@ static GLuint emit_indirect(struct intel
* like.
*/
delta = ((intel->batch->segment_finish_offset[segment] + size - 4) |
- DIS0_BUFFER_VALID |
- DIS0_BUFFER_RESET);
+ DIS0_BUFFER_VALID |
+ DIS0_BUFFER_RESET);
+
BEGIN_BATCH(2,0);
- OUT_BATCH( _3DSTATE_LOAD_INDIRECT | flag | (1<<14));
+ OUT_BATCH( _3DSTATE_LOAD_INDIRECT | flag | (1<<14) | 0);
OUT_RELOC( intel->batch->buffer,
DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
@@ -215,7 +211,6 @@ static GLuint emit_indirect(struct intel
break;
}
- out:
{
GLuint offset = intel->batch->segment_finish_offset[segment];
intel->batch->segment_finish_offset[segment] += size;
@@ -223,23 +218,11 @@ static GLuint emit_indirect(struct intel
if (state != NULL)
memcpy(intel->batch->map + offset, state, size);
- _mesa_printf("returning offset 0x%x\n", offset);
return offset;
}
}
-static void emit(struct intel_context *intel,
- const GLuint *state, GLuint size )
-{
- GLint i;
-
- BEGIN_BATCH( size/4, 0 );
- for (i = 0; i < size/4; i++)
- OUT_BATCH( state[i] );
- ADVANCE_BATCH( );
-}
-
static void
i915_emit_invarient_state(struct intel_context *intel)
{
@@ -313,19 +296,9 @@ i915_emit_invarient_state(struct intel_c
(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0)
};
-
- /* Do this once for initialization. Not really needed if we do
- * other indirect state later.
- */
-#if 0
- BEGIN_BATCH(2, 0);
- OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
-#endif
emit_indirect( intel,
- LI0_STATE_STATIC_INDIRECT,
+ LI0_STATE_STATIC_INDIRECT,
invarient_state,
sizeof(invarient_state) );
}
@@ -386,7 +359,7 @@ get_state_size(struct i915_hw_state *sta
}
#define OUT(x) do { \
- _mesa_printf("OUT(0x%08x)\n", x); \
+ if (0) _mesa_printf("OUT(0x%08x)\n", x); \
*p++ = (x); \
} while(0)
@@ -424,13 +397,16 @@ i915_emit_state(struct intel_context *in
* restart.
*/
if (dirty & (I915_UPLOAD_INVARIENT | I915_UPLOAD_BUFFERS)) {
- fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
+ if (INTEL_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
+
i915_emit_invarient_state(intel);
- fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
+ if (INTEL_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
- /* This needs to go in dynamic indirect state, once that is
- * working...
+ /* Does this go in dynamic indirect state, or static indirect
+ * state???
*/
BEGIN_BATCH(3, 0);
OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
@@ -458,7 +434,7 @@ i915_emit_state(struct intel_context *in
ADVANCE_BATCH();
#if 0
- /* What happens to scissor?
+ /* Where does scissor go?
*/
OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);
OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
@@ -468,8 +444,11 @@ i915_emit_state(struct intel_context *in
}
if (dirty & I915_UPLOAD_CTX) {
- fprintf(stderr, "I915_UPLOAD_CTX:\n");
+ if (INTEL_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "I915_UPLOAD_CTX:\n");
+ /* Immediate state: always goes in the batchbuffer.
+ */
BEGIN_BATCH(5, 0);
OUT_BATCH(state->Ctx[I915_CTXREG_LI]);
OUT_BATCH(state->Ctx[I915_CTXREG_LIS2]);
@@ -477,20 +456,11 @@ i915_emit_state(struct intel_context *in
OUT_BATCH(state->Ctx[I915_CTXREG_LIS5]);
OUT_BATCH(state->Ctx[I915_CTXREG_LIS6]);
ADVANCE_BATCH();
-
-#if 0
+
emit_indirect(intel,
LI0_STATE_DYNAMIC_INDIRECT,
state->Ctx + I915_CTXREG_STATE4,
4 * sizeof(GLuint) );
-#else
- BEGIN_BATCH(4, 0);
- OUT_BATCH(state->Ctx[I915_CTXREG_STATE4]);
- OUT_BATCH(state->Ctx[I915_CTXREG_IAB]);
- OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR0]);
- OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR1]);
- ADVANCE_BATCH();
-#endif
}
@@ -506,11 +476,14 @@ i915_emit_state(struct intel_context *in
if (dirty & I915_UPLOAD_TEX(i))
nr++;
+ /* A bit of a nasty kludge so that we can setup the relocation
+ * information for the buffer address in the indirect state
+ * packet:
+ */
offset = emit_indirect(intel,
LI0_STATE_MAP,
NULL,
(2 + nr * 3) * sizeof(GLuint) );
-
p = (GLuint *)(intel->batch->map + offset);
@@ -540,7 +513,9 @@ i915_emit_state(struct intel_context *in
- fprintf(stderr, "UPLOAD SAMPLERS:\n");
+ if (INTEL_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "UPLOAD SAMPLERS:\n");
+
offset = emit_indirect(intel,
LI0_STATE_SAMPLER,
NULL,
@@ -562,16 +537,13 @@ i915_emit_state(struct intel_context *in
}
if (dirty & I915_UPLOAD_PROGRAM) {
- fprintf(stderr, "I915_UPLOAD_PROGRAM:\n");
+ if (INTEL_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "I915_UPLOAD_PROGRAM:\n");
assert((state->Program[0] & 0x1ff) + 2 == state->ProgramSize);
-#if 1
emit_indirect(intel, LI0_STATE_PROGRAM,
state->Program, state->ProgramSize * sizeof(GLuint));
-#else
- emit(intel, state->Program, state->ProgramSize * sizeof(GLuint));
-#endif
if (INTEL_DEBUG & DEBUG_STATE)
i915_disassemble_program(state->Program, state->ProgramSize);
@@ -579,13 +551,11 @@ i915_emit_state(struct intel_context *in
if (dirty & I915_UPLOAD_CONSTANTS) {
- fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n");
-#if 1
+ if (INTEL_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n");
+
emit_indirect(intel, LI0_STATE_CONSTANTS,
state->Constant, state->ConstantSize * sizeof(GLuint));
-#else
- emit(intel, state->Constant, state->ConstantSize * sizeof(GLuint));
-#endif
}
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
index acac170..dd6e416 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
@@ -235,11 +235,13 @@ do_flush_locked(struct intel_batchbuffer
struct buffer_reloc *r = &batch->reloc[i];
ptr[r->offset / 4] = driBOOffset(r->buf) + r->delta;
- _mesa_printf("reloc offset %x value 0x%x + 0x%x\n",
- r->offset, driBOOffset(r->buf), r->delta);
+
+ if (INTEL_DEBUG & DEBUG_BATCH)
+ _mesa_printf("reloc offset %x value 0x%x + 0x%x\n",
+ r->offset, driBOOffset(r->buf), r->delta);
}
-/* if (INTEL_DEBUG & DEBUG_BATCH) */
+ if (INTEL_DEBUG & DEBUG_BATCH)
intel_dump_batchbuffer(batch, ptr);
driBOUnmap(batch->buffer);
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
index 6bbbf33..7bf705e 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
@@ -147,29 +147,29 @@ intel_batchbuffer_require_space(struct i
#define BEGIN_BATCH_SEGMENT(seg, n, flags) do { \
assert(!intel->prim.flush); \
intel_batchbuffer_require_space(intel->batch, seg, (n)*4, flags); \
- _mesa_printf("BEGIN_BATCH(%d,%d,%d) in %s\n", seg, n, flags, __FUNCTION__); \
+ if (0) _mesa_printf("BEGIN_BATCH(%d,%d,%d) in %s\n", seg, n, flags, __FUNCTION__); \
} while (0)
#define OUT_BATCH_SEGMENT(seg, d) do { \
- _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, d); \
+ if (0) _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, d); \
intel_batchbuffer_emit_dword(intel->batch, seg, d); \
} while (0)
#define OUT_BATCH_F_SEGMENT(seg, fl) do { \
fi_type fi; \
fi.f = fl; \
- _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, fi.i); \
+ if (0) _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, fi.i); \
intel_batchbuffer_emit_dword(intel->batch, seg, fi.i); \
} while (0)
#define OUT_RELOC_SEGMENT(seg, buf,flags,mask,delta) do { \
assert((delta) >= 0); \
- _mesa_printf("OUT_RELOC( seg %d buf %p offset %x )\n", seg, buf, delta); \
+ if (0) _mesa_printf("OUT_RELOC( seg %d buf %p offset %x )\n", seg, buf, delta); \
intel_batchbuffer_emit_reloc(intel->batch, seg, buf, flags, mask, delta); \
} while (0)
#define ADVANCE_BATCH_SEGMENT(seg) do { \
- _mesa_printf("ADVANCE_BATCH()\n"); \
+ if (0) _mesa_printf("ADVANCE_BATCH()\n"); \
} while(0)
diff-tree b806180a2a024b4b7a4e3fe32ea33773160ef054 (from 8b4f4abc391dc8881b5c01b528a9794c4b04590a)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date: Wed Mar 14 18:58:55 2007 +0000
Fix off-by-one in LOAD_IMMEDIATE packet size
diff --git a/src/mesa/drivers/dri/i915tex/i915_state.c b/src/mesa/drivers/dri/i915tex/i915_state.c
index 78ae4bd..1fafadc 100644
--- a/src/mesa/drivers/dri/i915tex/i915_state.c
+++ b/src/mesa/drivers/dri/i915tex/i915_state.c
@@ -859,7 +859,7 @@ i915_init_packets(struct i915_context *i
i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
I1_LOAD_S(2) |
I1_LOAD_S(4) |
- I1_LOAD_S(5) | I1_LOAD_S(6) | (4));
+ I1_LOAD_S(5) | I1_LOAD_S(6) | (3));
i915->state.Ctx[I915_CTXREG_LIS2] = 0;
i915->state.Ctx[I915_CTXREG_LIS4] = 0;
i915->state.Ctx[I915_CTXREG_LIS5] = 0;
diff-tree 8b4f4abc391dc8881b5c01b528a9794c4b04590a (from 61c7591f24abe67ee4148d16ca30b9c6d5b6b4c6)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date: Wed Mar 14 15:40:11 2007 +0000
Emit sampler and map indirect state.
diff --git a/src/mesa/drivers/dri/i915tex/i915_vtbl.c b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
index e2f271a..bb9c60f 100644
--- a/src/mesa/drivers/dri/i915tex/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
@@ -157,15 +157,18 @@ i915_check_vertex_size(struct intel_cont
return sz == expected;
}
-static GLuint *emit_indirect(struct intel_context *intel,
- GLuint flag,
- const GLuint *state,
- GLuint size )
+static GLuint emit_indirect(struct intel_context *intel,
+ GLuint flag,
+ const GLuint *state,
+ GLuint size )
{
- GLint i;
GLuint delta;
GLuint segment;
- GLuint *ptr;
+
+ if (!state) {
+ segment = 0;
+ goto out;
+ }
switch (flag) {
case LI0_STATE_DYNAMIC_INDIRECT:
@@ -211,17 +214,23 @@ static GLuint *emit_indirect(struct inte
break;
}
-
- /* Now emit the indirect state. XXX: better not flush!
- */
- BEGIN_BATCH_SEGMENT( segment, size/4, 0 );
- for (i = 0; i < size/4; i++)
- OUT_BATCH_SEGMENT( segment, state[i] );
- ADVANCE_BATCH_SEGMENT( segment );
+
+ out:
+ {
+ GLuint offset = intel->batch->segment_finish_offset[segment];
+ intel->batch->segment_finish_offset[segment] += size;
+
+ if (state != NULL)
+ memcpy(intel->batch->map + offset, state, size);
+
+ _mesa_printf("returning offset 0x%x\n", offset);
+ return offset;
+ }
}
-static void emit(struct intel_context *intel, const GLuint *state, GLuint size )
+static void emit(struct intel_context *intel,
+ const GLuint *state, GLuint size )
{
GLint i;
@@ -308,15 +317,15 @@ i915_emit_invarient_state(struct intel_c
/* Do this once for initialization. Not really needed if we do
* other indirect state later.
*/
+#if 0
BEGIN_BATCH(2, 0);
OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
OUT_BATCH(0);
ADVANCE_BATCH();
-
+#endif
-
emit_indirect( intel,
- LI0_STATE_STATIC_INDIRECT,
+ LI0_STATE_STATIC_INDIRECT,
invarient_state,
sizeof(invarient_state) );
}
@@ -376,6 +385,10 @@ get_state_size(struct i915_hw_state *sta
return sz;
}
+#define OUT(x) do { \
+ _mesa_printf("OUT(0x%08x)\n", x); \
+ *p++ = (x); \
+} while(0)
/* Push the state into the sarea and/or texture memory.
*/
@@ -415,8 +428,9 @@ i915_emit_state(struct intel_context *in
i915_emit_invarient_state(intel);
fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
- /* This state cannot be handled by the hardware binner. There
- * is no need to put it in an indirect buffer.
+
+ /* This needs to go in dynamic indirect state, once that is
+ * working...
*/
BEGIN_BATCH(3, 0);
OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
@@ -444,7 +458,7 @@ i915_emit_state(struct intel_context *in
ADVANCE_BATCH();
#if 0
- /* Scissoring not allowed - what to do about this?
+ /* What happens to scissor?
*/
OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);
OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
@@ -465,16 +479,17 @@ i915_emit_state(struct intel_context *in
ADVANCE_BATCH();
#if 0
- emit_indirect(intel, LI0_STATE_DYNAMIC_INDIRECT,
+ emit_indirect(intel,
+ LI0_STATE_DYNAMIC_INDIRECT,
state->Ctx + I915_CTXREG_STATE4,
4 * sizeof(GLuint) );
#else
- BEGIN_BATCH(4, 0);
- OUT_BATCH(state->Ctx[I915_CTXREG_STATE4]);
- OUT_BATCH(state->Ctx[I915_CTXREG_IAB]);
- OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR0]);
- OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR1]);
- ADVANCE_BATCH();
+ BEGIN_BATCH(4, 0);
+ OUT_BATCH(state->Ctx[I915_CTXREG_STATE4]);
+ OUT_BATCH(state->Ctx[I915_CTXREG_IAB]);
+ OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR0]);
+ OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR1]);
+ ADVANCE_BATCH();
#endif
}
@@ -483,53 +498,67 @@ i915_emit_state(struct intel_context *in
* avoid lockups on I915 hardware.
*/
if (dirty & I915_UPLOAD_TEX_ALL) {
- assert(0);
-#if 0
- GLuint buf[2 + I915_TEX_UNITS * 3];
- int nr = 0;
+ GLuint offset;
+ GLuint *p;
+ int i, nr = 0;
for (i = 0; i < I915_TEX_UNITS; i++)
if (dirty & I915_UPLOAD_TEX(i))
nr++;
- fprintf(stderr, "UPLOAD MAPS:\n");
+ offset = emit_indirect(intel,
+ LI0_STATE_MAP,
+ NULL,
+ (2 + nr * 3) * sizeof(GLuint) );
+
+
+ p = (GLuint *)(intel->batch->map + offset);
+
+ OUT(_3DSTATE_MAP_STATE | (3 * nr));
+ OUT((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
- BEGIN_STATIC(2 + nr * 3, LI0_STATE_MAP);
- OUT_STATIC(_3DSTATE_MAP_STATE | (3 * nr));
- OUT_STATIC((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
for (i = 0; i < I915_TEX_UNITS; i++)
- if (dirty & I915_UPLOAD_TEX(i)) {
+ if (dirty & I915_UPLOAD_TEX(i)) {
+ if (state->tex_buffer[i]) {
+ intel_batchbuffer_set_reloc( intel->batch,
+ ((GLubyte *)p) - intel->batch->map,
+ state->tex_buffer[i],
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+ DRM_BO_MASK_MEM | DRM_BO_FLAG_READ,
+ state->tex_offset[i]);
+ OUT(0); /* placeholder */
+ }
+ else {
+ assert(i == 0);
+ assert(state == &i915->meta);
+ OUT(0);
+ }
+
+ OUT(state->Tex[i][I915_TEXREG_MS3]);
+ OUT(state->Tex[i][I915_TEXREG_MS4]);
+ }
- if (state->tex_buffer[i]) {
- OUT_STATIC_RELOC(state->tex_buffer[i],
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- DRM_BO_MASK_MEM | DRM_BO_FLAG_READ,
- state->tex_offset[i]);
- }
- else {
- assert(i == 0);
- assert(state == &i915->meta);
- OUT_STATIC(0);
- }
-
- OUT_STATIC(state->Tex[i][I915_TEXREG_MS3]);
- OUT_STATIC(state->Tex[i][I915_TEXREG_MS4]);
- }
- ADVANCE_STATIC();
fprintf(stderr, "UPLOAD SAMPLERS:\n");
- BEGIN_STATIC(2 + nr * 3, LI0_STATE_SAMPLER);
- OUT_STATIC(_3DSTATE_SAMPLER_STATE | (3 * nr));
- OUT_STATIC((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
- for (i = 0; i < I915_TEX_UNITS; i++)
- if (dirty & I915_UPLOAD_TEX(i)) {
- OUT_STATIC(state->Tex[i][I915_TEXREG_SS2]);
- OUT_STATIC(state->Tex[i][I915_TEXREG_SS3]);
- OUT_STATIC(state->Tex[i][I915_TEXREG_SS4]);
- }
- ADVANCE_STATIC();
-#endif
+ offset = emit_indirect(intel,
+ LI0_STATE_SAMPLER,
+ NULL,
+ (2 + nr * 3) * sizeof(GLuint) );
+
+
+ p = (GLuint *)(intel->batch->map + offset);
+
+
+ OUT(_3DSTATE_SAMPLER_STATE | (3 * nr));
+ OUT((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+ for (i = 0; i < I915_TEX_UNITS; i++) {
+ if (dirty & I915_UPLOAD_TEX(i)) {
+ OUT(state->Tex[i][I915_TEXREG_SS2]);
+ OUT(state->Tex[i][I915_TEXREG_SS3]);
+ OUT(state->Tex[i][I915_TEXREG_SS4]);
+ }
+ }
}
if (dirty & I915_UPLOAD_PROGRAM) {
diff-tree 61c7591f24abe67ee4148d16ca30b9c6d5b6b4c6 (from 0b43da5227a85d2488df78d233b974ccf5f1afc5)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date: Wed Mar 14 15:39:11 2007 +0000
Add intel_batchbuffer_set_reloc - add relocation at arbitary offset
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
index 5d64344..acac170 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
@@ -87,7 +87,8 @@ intel_dump_batchbuffer(struct intel_batc
{
GLuint *ptr = (GLuint *)map;
GLuint count = batch->segment_finish_offset[0];
- GLuint buf = driBOOffset(batch->buffer);
+ GLuint buf0 = driBOOffset(batch->buffer);
+ GLuint buf = buf0;;
fprintf(stderr, "\n\n\nIMMEDIATE: (%d)\n", count / 4);
dump( buf, ptr, count/4 );
@@ -95,6 +96,7 @@ intel_dump_batchbuffer(struct intel_batc
count = batch->segment_finish_offset[1] - batch->segment_start_offset[1];
ptr = (GLuint *)(map + batch->segment_start_offset[1]);
+ buf = buf0 + batch->segment_start_offset[1];
fprintf(stderr, "\n\n\nDYNAMIC: (%d)\n", count / 4);
dump( buf, ptr, count/4 );
@@ -102,6 +104,7 @@ intel_dump_batchbuffer(struct intel_batc
count = batch->segment_finish_offset[2] - batch->segment_start_offset[2];
ptr = (GLuint *)(map + batch->segment_start_offset[2]);
+ buf = buf0 + batch->segment_start_offset[2];
fprintf(stderr, "\n\n\nOTHER INDIRECT: (%d)\n", count / 4);
dump( buf, ptr, count/4 );
@@ -232,6 +235,8 @@ do_flush_locked(struct intel_batchbuffer
struct buffer_reloc *r = &batch->reloc[i];
ptr[r->offset / 4] = driBOOffset(r->buf) + r->delta;
+ _mesa_printf("reloc offset %x value 0x%x + 0x%x\n",
+ r->offset, driBOOffset(r->buf), r->delta);
}
/* if (INTEL_DEBUG & DEBUG_BATCH) */
@@ -358,12 +363,13 @@ intel_batchbuffer_finish(struct intel_ba
/* This is the only way buffers get added to the validate list.
*/
GLboolean
-intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
- GLuint segment,
- struct _DriBufferObject *buffer,
- GLuint flags, GLuint mask, GLuint delta)
+intel_batchbuffer_set_reloc(struct intel_batchbuffer *batch,
+ GLuint offset,
+ struct _DriBufferObject *buffer,
+ GLuint flags, GLuint mask, GLuint delta)
{
assert(batch->nr_relocs < MAX_RELOCS);
+ assert((offset & 3) == 0);
if (buffer != batch->buffer)
driBOAddListItem(&batch->list, buffer, flags, mask);
@@ -375,10 +381,24 @@ intel_batchbuffer_emit_reloc(struct inte
driBOReference(buffer);
r->buf = buffer;
- r->offset = batch->segment_finish_offset[segment];
+ r->offset = offset;
r->delta = delta;
}
+ return GL_TRUE;
+}
+
+
+GLboolean
+intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+ GLuint segment,
+ struct _DriBufferObject *buffer,
+ GLuint flags, GLuint mask, GLuint delta)
+{
+ intel_batchbuffer_set_reloc( batch,
+ batch->segment_finish_offset[segment],
+ buffer, flags, mask, delta );
+
batch->segment_finish_offset[segment] += 4;
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
index 8b7f988..6bbbf33 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
@@ -84,6 +84,12 @@ void intel_batchbuffer_release_space(str
GLuint segment,
GLuint bytes);
+GLboolean
+intel_batchbuffer_set_reloc(struct intel_batchbuffer *batch,
+ GLuint offset,
+ struct _DriBufferObject *buffer,
+ GLuint flags, GLuint mask, GLuint delta);
+
GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
GLuint segment,
struct _DriBufferObject *buffer,
diff-tree 0b43da5227a85d2488df78d233b974ccf5f1afc5 (from 682fdd9462485562e3534293a12ccebc41d3c3ed)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date: Wed Mar 14 14:49:18 2007 +0000
Experiments with indirect state.
Split the batchbuffer into 3 segments for the three different types
of state (immediate, dynamic-indirect, other-indirect). This is
primarily so that relocations continue to work without too many changes.
Most stuff broken, but trivial/tri.c works. Dynamic indirect state not
working.
diff --git a/src/mesa/drivers/dri/i915tex/i830_vtbl.c b/src/mesa/drivers/dri/i915tex/i830_vtbl.c
index dd0670d..509a369 100644
--- a/src/mesa/drivers/dri/i915tex/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915tex/i830_vtbl.c
@@ -426,7 +426,7 @@ i830_emit_state(struct intel_context *in
* scheduling is allowed, rather than assume that it is whenever a
* batchbuffer fills up.
*/
- intel_batchbuffer_require_space(intel->batch, get_state_size(state), 0);
+ intel_batchbuffer_require_space(intel->batch, 0, get_state_size(state), 0);
/* Do this here as we may have flushed the batchbuffer above,
* causing more state to be dirty!
diff --git a/src/mesa/drivers/dri/i915tex/i915_context.h b/src/mesa/drivers/dri/i915tex/i915_context.h
index d2713e8..f594304 100644
--- a/src/mesa/drivers/dri/i915tex/i915_context.h
+++ b/src/mesa/drivers/dri/i915tex/i915_context.h
@@ -66,12 +66,12 @@
#define I915_DESTREG_SR2 11
#define I915_DEST_SETUP_SIZE 12
-#define I915_CTXREG_STATE4 0
-#define I915_CTXREG_LI 1
-#define I915_CTXREG_LIS2 2
-#define I915_CTXREG_LIS4 3
-#define I915_CTXREG_LIS5 4
-#define I915_CTXREG_LIS6 5
+#define I915_CTXREG_LI 0
+#define I915_CTXREG_LIS2 1
+#define I915_CTXREG_LIS4 2
+#define I915_CTXREG_LIS5 3
+#define I915_CTXREG_LIS6 4
+#define I915_CTXREG_STATE4 5
#define I915_CTXREG_IAB 6
#define I915_CTXREG_BLENDCOLOR0 7
#define I915_CTXREG_BLENDCOLOR1 8
diff --git a/src/mesa/drivers/dri/i915tex/i915_vtbl.c b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
index 52db9a9..e2f271a 100644
--- a/src/mesa/drivers/dri/i915tex/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
@@ -157,71 +157,171 @@ i915_check_vertex_size(struct intel_cont
return sz == expected;
}
+static GLuint *emit_indirect(struct intel_context *intel,
+ GLuint flag,
+ const GLuint *state,
+ GLuint size )
+{
+ GLint i;
+ GLuint delta;
+ GLuint segment;
+ GLuint *ptr;
+
+ switch (flag) {
+ case LI0_STATE_DYNAMIC_INDIRECT:
+ segment = SEGMENT_DYNAMIC_INDIRECT;
+
+ /* Dynamic indirect state is different - tell it the ending
+ * address, it will execute from either the previous end address
+ * or the beginning of the 4k page, depending on what it feels
+ * like.
+ */
+ delta = ((intel->batch->segment_finish_offset[segment] + size - 4) |
+ DIS0_BUFFER_VALID |
+ DIS0_BUFFER_RESET);
+
+ BEGIN_BATCH(2,0);
+ OUT_BATCH( _3DSTATE_LOAD_INDIRECT | flag | (1<<14));
+ OUT_RELOC( intel->batch->buffer,
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
+ DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
+ delta );
+ ADVANCE_BATCH();
+ break;
-static void
-i915_emit_invarient_state(struct intel_context *intel)
-{
- BATCH_LOCALS;
-
- BEGIN_BATCH(200, 0);
-
- OUT_BATCH(_3DSTATE_AA_CMD |
- AA_LINE_ECAAR_WIDTH_ENABLE |
- AA_LINE_ECAAR_WIDTH_1_0 |
- AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+ default:
+ segment = SEGMENT_OTHER_INDIRECT;
- OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
- OUT_BATCH(0);
+ /* Other state is more conventional: tell the hardware the start
+ * point and size.
+ */
+ delta = (intel->batch->segment_finish_offset[segment] |
+ SIS0_FORCE_LOAD | /* XXX: fix me */
+ SIS0_BUFFER_VALID);
+
+ BEGIN_BATCH(3,0);
+ OUT_BATCH( _3DSTATE_LOAD_INDIRECT | flag | (1<<14) | 1);
+ OUT_RELOC( intel->batch->buffer,
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
+ DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
+ delta );
+ OUT_BATCH( (size/4)-1 );
+ ADVANCE_BATCH();
- OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
- OUT_BATCH(0);
+
+ break;
+ }
+
+ /* Now emit the indirect state. XXX: better not flush!
+ */
+ BEGIN_BATCH_SEGMENT( segment, size/4, 0 );
+ for (i = 0; i < size/4; i++)
+ OUT_BATCH_SEGMENT( segment, state[i] );
+ ADVANCE_BATCH_SEGMENT( segment );
+}
- OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
- OUT_BATCH(0);
- /* Don't support texture crossbar yet */
- OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
- CSB_TCB(0, 0) |
- CSB_TCB(1, 1) |
- CSB_TCB(2, 2) |
- CSB_TCB(3, 3) |
- CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
-
- OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
- ENABLE_POINT_RASTER_RULE |
- OGL_POINT_RASTER_RULE |
- ENABLE_LINE_STRIP_PROVOKE_VRTX |
- ENABLE_TRI_FAN_PROVOKE_VRTX |
- LINE_STRIP_PROVOKE_VRTX(1) |
- TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
+static void emit(struct intel_context *intel, const GLuint *state, GLuint size )
+{
+ GLint i;
- /* Need to initialize this to zero.
- */
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (1));
- OUT_BATCH(0);
+ BEGIN_BATCH( size/4, 0 );
+ for (i = 0; i < size/4; i++)
+ OUT_BATCH( state[i] );
+ ADVANCE_BATCH( );
+}
- /* XXX: Use this */
- OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+static void
+i915_emit_invarient_state(struct intel_context *intel)
+{
+ static GLuint invarient_state[] = {
- OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
- OUT_BATCH(0);
- OUT_BATCH(0);
+ (_3DSTATE_AA_CMD |
+ AA_LINE_ECAAR_WIDTH_ENABLE |
+ AA_LINE_ECAAR_WIDTH_1_0 |
+ AA_LINE_REGION_WIDTH_ENABLE |
+ AA_LINE_REGION_WIDTH_1_0),
+
+ /* Could use these to reduce the size of vertices when the incoming
+ * array is constant.
+ */
+ (_3DSTATE_DFLT_DIFFUSE_CMD),
+ (0),
+
+ (_3DSTATE_DFLT_SPEC_CMD),
+ (0),
+
+ (_3DSTATE_DFLT_Z_CMD),
+ (0),
+
+ /* We support texture crossbar via the fragment shader, rather than
+ * with this mechanism.
+ */
+ (_3DSTATE_COORD_SET_BINDINGS |
+ CSB_TCB(0, 0) |
+ CSB_TCB(1, 1) |
+ CSB_TCB(2, 2) |
+ CSB_TCB(3, 3) |
+ CSB_TCB(4, 4) |
+ CSB_TCB(5, 5) |
+ CSB_TCB(6, 6) |
+ CSB_TCB(7, 7)),
+
+ /* Setup OpenGL rasterization state:
+ */
+ (_3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) |
+ ENABLE_TEXKILL_3D_4D |
+ TEXKILL_4D),
+
+ /* Need to initialize this to zero.
+ */
+ (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ I1_LOAD_S(3) |
+ (1)),
+ (0),
+
+ (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT),
+ (_3DSTATE_SCISSOR_RECT_0_CMD),
+ (0),
+ (0),
+
+ /* Turn off stipple for now
+ */
+ _3DSTATE_STIPPLE,
+ 0,
+
+ /* For private depth buffers but shared color buffers, eg
+ * front-buffer rendering with a private depthbuffer. We don't do
+ * this.
+ */
+ (_3DSTATE_DEPTH_SUBRECT_DISABLE),
- OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
+ (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0)
+ };
- OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */
+ /* Do this once for initialization. Not really needed if we do
+ * other indirect state later.
+ */
+ BEGIN_BATCH(2, 0);
+ OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
OUT_BATCH(0);
+ ADVANCE_BATCH();
+
- /* Don't support twosided stencil yet */
- OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
-
- ADVANCE_BATCH();
+ emit_indirect( intel,
+ LI0_STATE_STATIC_INDIRECT,
+ invarient_state,
+ sizeof(invarient_state) );
}
-#define emit(intel, state, size ) \
- intel_batchbuffer_data(intel->batch, state, size, 0 )
static GLuint
get_dirty(struct i915_hw_state *state)
@@ -284,7 +384,6 @@ i915_emit_state(struct intel_context *in
{
struct i915_context *i915 = i915_context(&intel->ctx);
struct i915_hw_state *state = i915->current;
- int i;
GLuint dirty;
BATCH_LOCALS;
@@ -295,7 +394,8 @@ i915_emit_state(struct intel_context *in
* scheduling is allowed, rather than assume that it is whenever a
* batchbuffer fills up.
*/
- intel_batchbuffer_require_space(intel->batch, get_state_size(state), 0);
+ intel_batchbuffer_require_space(intel->batch, 0,
+ get_state_size(state), 0);
/* Do this here as we may have flushed the batchbuffer above,
* causing more state to be dirty!
@@ -305,122 +405,161 @@ i915_emit_state(struct intel_context *in
if (INTEL_DEBUG & DEBUG_STATE)
fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty);
- if (dirty & I915_UPLOAD_INVARIENT) {
- if (INTEL_DEBUG & DEBUG_STATE)
- fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
+ /* This should not change during a scene for HWZ, correct?
+ *
+ * If it does change, we probably have to flush everything and
+ * restart.
+ */
+ if (dirty & (I915_UPLOAD_INVARIENT | I915_UPLOAD_BUFFERS)) {
+ fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
i915_emit_invarient_state(intel);
- }
- if (dirty & I915_UPLOAD_CTX) {
- if (INTEL_DEBUG & DEBUG_STATE)
- fprintf(stderr, "I915_UPLOAD_CTX:\n");
-
- emit(intel, state->Ctx, sizeof(state->Ctx));
- }
-
- if (dirty & I915_UPLOAD_BUFFERS) {
- if (INTEL_DEBUG & DEBUG_STATE)
- fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
- BEGIN_BATCH(I915_DEST_SETUP_SIZE + 2, 0);
+ fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
+ /* This state cannot be handled by the hardware binner. There
+ * is no need to put it in an indirect buffer.
+ */
+ BEGIN_BATCH(3, 0);
OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
OUT_RELOC(state->draw_region->buffer,
DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE,
state->draw_region->draw_offset);
+ ADVANCE_BATCH();
if (state->depth_region) {
+ BEGIN_BATCH(3, 0);
OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
OUT_RELOC(state->depth_region->buffer,
DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE,
state->depth_region->draw_offset);
+ ADVANCE_BATCH();
}
+ BEGIN_BATCH(2, 0);
OUT_BATCH(state->Buffer[I915_DESTREG_DV0]);
OUT_BATCH(state->Buffer[I915_DESTREG_DV1]);
+ ADVANCE_BATCH();
+
+#if 0
+ /* Scissoring not allowed - what to do about this?
+ */
OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);
OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
- ADVANCE_BATCH();
+#endif
}
- if (dirty & I915_UPLOAD_STIPPLE) {
- if (INTEL_DEBUG & DEBUG_STATE)
- fprintf(stderr, "I915_UPLOAD_STIPPLE:\n");
- emit(intel, state->Stipple, sizeof(state->Stipple));
- }
+ if (dirty & I915_UPLOAD_CTX) {
+ fprintf(stderr, "I915_UPLOAD_CTX:\n");
- if (dirty & I915_UPLOAD_FOG) {
- if (INTEL_DEBUG & DEBUG_STATE)
- fprintf(stderr, "I915_UPLOAD_FOG:\n");
- emit(intel, state->Fog, sizeof(state->Fog));
+ BEGIN_BATCH(5, 0);
+ OUT_BATCH(state->Ctx[I915_CTXREG_LI]);
+ OUT_BATCH(state->Ctx[I915_CTXREG_LIS2]);
+ OUT_BATCH(state->Ctx[I915_CTXREG_LIS4]);
+ OUT_BATCH(state->Ctx[I915_CTXREG_LIS5]);
+ OUT_BATCH(state->Ctx[I915_CTXREG_LIS6]);
+ ADVANCE_BATCH();
+
+#if 0
+ emit_indirect(intel, LI0_STATE_DYNAMIC_INDIRECT,
+ state->Ctx + I915_CTXREG_STATE4,
+ 4 * sizeof(GLuint) );
+#else
+ BEGIN_BATCH(4, 0);
+ OUT_BATCH(state->Ctx[I915_CTXREG_STATE4]);
+ OUT_BATCH(state->Ctx[I915_CTXREG_IAB]);
+ OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR0]);
+ OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR1]);
+ ADVANCE_BATCH();
+#endif
}
+
/* Combine all the dirty texture state into a single command to
* avoid lockups on I915 hardware.
*/
if (dirty & I915_UPLOAD_TEX_ALL) {
+ assert(0);
+#if 0
+ GLuint buf[2 + I915_TEX_UNITS * 3];
int nr = 0;
for (i = 0; i < I915_TEX_UNITS; i++)
if (dirty & I915_UPLOAD_TEX(i))
nr++;
- BEGIN_BATCH(2 + nr * 3, 0);
- OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
- OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+ fprintf(stderr, "UPLOAD MAPS:\n");
+
+ BEGIN_STATIC(2 + nr * 3, LI0_STATE_MAP);
+ OUT_STATIC(_3DSTATE_MAP_STATE | (3 * nr));
+ OUT_STATIC((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
for (i = 0; i < I915_TEX_UNITS; i++)
if (dirty & I915_UPLOAD_TEX(i)) {
if (state->tex_buffer[i]) {
- OUT_RELOC(state->tex_buffer[i],
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- DRM_BO_MASK_MEM | DRM_BO_FLAG_READ,
- state->tex_offset[i]);
+ OUT_STATIC_RELOC(state->tex_buffer[i],
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+ DRM_BO_MASK_MEM | DRM_BO_FLAG_READ,
+ state->tex_offset[i]);
}
else {
assert(i == 0);
assert(state == &i915->meta);
- OUT_BATCH(0);
+ OUT_STATIC(0);
}
- OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
- OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
+ OUT_STATIC(state->Tex[i][I915_TEXREG_MS3]);
+ OUT_STATIC(state->Tex[i][I915_TEXREG_MS4]);
}
- ADVANCE_BATCH();
+ ADVANCE_STATIC();
+
- BEGIN_BATCH(2 + nr * 3, 0);
- OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr));
- OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+ fprintf(stderr, "UPLOAD SAMPLERS:\n");
+ BEGIN_STATIC(2 + nr * 3, LI0_STATE_SAMPLER);
+ OUT_STATIC(_3DSTATE_SAMPLER_STATE | (3 * nr));
+ OUT_STATIC((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
for (i = 0; i < I915_TEX_UNITS; i++)
if (dirty & I915_UPLOAD_TEX(i)) {
- OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]);
- OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]);
- OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]);
+ OUT_STATIC(state->Tex[i][I915_TEXREG_SS2]);
+ OUT_STATIC(state->Tex[i][I915_TEXREG_SS3]);
+ OUT_STATIC(state->Tex[i][I915_TEXREG_SS4]);
}
- ADVANCE_BATCH();
- }
-
- if (dirty & I915_UPLOAD_CONSTANTS) {
- if (INTEL_DEBUG & DEBUG_STATE)
- fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n");
- emit(intel, state->Constant, state->ConstantSize * sizeof(GLuint));
+ ADVANCE_STATIC();
+#endif
}
if (dirty & I915_UPLOAD_PROGRAM) {
- if (INTEL_DEBUG & DEBUG_STATE)
- fprintf(stderr, "I915_UPLOAD_PROGRAM:\n");
+ fprintf(stderr, "I915_UPLOAD_PROGRAM:\n");
assert((state->Program[0] & 0x1ff) + 2 == state->ProgramSize);
+#if 1
+ emit_indirect(intel, LI0_STATE_PROGRAM,
+ state->Program, state->ProgramSize * sizeof(GLuint));
+#else
emit(intel, state->Program, state->ProgramSize * sizeof(GLuint));
+#endif
+
if (INTEL_DEBUG & DEBUG_STATE)
i915_disassemble_program(state->Program, state->ProgramSize);
}
+
+ if (dirty & I915_UPLOAD_CONSTANTS) {
+ fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n");
+#if 1
+ emit_indirect(intel, LI0_STATE_CONSTANTS,
+ state->Constant, state->ConstantSize * sizeof(GLuint));
+#else
+ emit(intel, state->Constant, state->ConstantSize * sizeof(GLuint));
+#endif
+ }
+
+
state->emitted |= dirty;
}
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
index 309ecf9..5d64344 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
@@ -66,15 +66,45 @@
* server automatically waits on its own dma to complete before
* modifying cliprects ???
*/
-
-static void
-intel_dump_batchbuffer(GLuint offset, GLuint * ptr, GLuint count)
+static void dump(GLuint offset, GLuint *ptr, GLuint count)
{
- int i;
- fprintf(stderr, "\n\n\nSTART BATCH (%d dwords):\n", count / 4);
- for (i = 0; i < count / 4; i += 4)
+ GLuint i;
+
+#if 0
+ for (i = 0; i < count; i += 4)
fprintf(stderr, "0x%x:\t0x%08x 0x%08x 0x%08x 0x%08x\n",
offset + i * 4, ptr[i], ptr[i + 1], ptr[i + 2], ptr[i + 3]);
+#else
+ for (i = 0; i < count; i++)
+ fprintf(stderr, "0x%x:\t0x%08x\n",
+ offset + i * 4, ptr[i]);
+#endif
+}
+
+
+static void
+intel_dump_batchbuffer(struct intel_batchbuffer *batch, GLubyte *map)
+{
+ GLuint *ptr = (GLuint *)map;
+ GLuint count = batch->segment_finish_offset[0];
+ GLuint buf = driBOOffset(batch->buffer);
+
+ fprintf(stderr, "\n\n\nIMMEDIATE: (%d)\n", count / 4);
+ dump( buf, ptr, count/4 );
+ fprintf(stderr, "END BATCH\n\n\n");
+
+ count = batch->segment_finish_offset[1] - batch->segment_start_offset[1];
+ ptr = (GLuint *)(map + batch->segment_start_offset[1]);
+
+ fprintf(stderr, "\n\n\nDYNAMIC: (%d)\n", count / 4);
+ dump( buf, ptr, count/4 );
+ fprintf(stderr, "END BATCH\n\n\n");
+
+ count = batch->segment_finish_offset[2] - batch->segment_start_offset[2];
+ ptr = (GLuint *)(map + batch->segment_start_offset[2]);
+
+ fprintf(stderr, "\n\n\nOTHER INDIRECT: (%d)\n", count / 4);
+ dump( buf, ptr, count/4 );
fprintf(stderr, "END BATCH\n\n\n");
}
@@ -99,7 +129,8 @@ intel_batchbuffer_reset(struct intel_bat
for (i = 0; i < batch->nr_relocs; i++) {
struct buffer_reloc *r = &batch->reloc[i];
- driBOUnReference(r->buf);
+ if (r->buf != batch->buffer)
+ driBOUnReference(r->buf);
}
batch->list_count = 0;
@@ -118,7 +149,10 @@ intel_batchbuffer_reset(struct intel_bat
batch->map = driBOMap(batch->buffer, DRM_BO_FLAG_WRITE, 0);
- batch->ptr = batch->map;
+
+ batch->segment_finish_offset[0] = batch->segment_start_offset[0];
+ batch->segment_finish_offset[1] = batch->segment_start_offset[1];
+ batch->segment_finish_offset[2] = batch->segment_start_offset[2];
}
/*======================================================================
@@ -136,6 +170,19 @@ intel_batchbuffer_alloc(struct intel_con
DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE, 0);
batch->last_fence = NULL;
driBOCreateList(20, &batch->list);
+
+ batch->segment_start_offset[0] = 0 * SEGMENT_SZ;
+ batch->segment_start_offset[1] = 1 * SEGMENT_SZ;
+ batch->segment_start_offset[2] = 2 * SEGMENT_SZ;
+
+ batch->segment_finish_offset[0] = 0 * SEGMENT_SZ;
+ batch->segment_finish_offset[1] = 1 * SEGMENT_SZ;
+ batch->segment_finish_offset[2] = 2 * SEGMENT_SZ;
+
+ batch->segment_max_offset[0] = 1 * SEGMENT_SZ - BATCH_RESERVED;
+ batch->segment_max_offset[1] = 2 * SEGMENT_SZ;
+ batch->segment_max_offset[2] = 3 * SEGMENT_SZ;
+
intel_batchbuffer_reset(batch);
return batch;
}
@@ -187,8 +234,8 @@ do_flush_locked(struct intel_batchbuffer
ptr[r->offset / 4] = driBOOffset(r->buf) + r->delta;
}
- if (INTEL_DEBUG & DEBUG_BATCH)
- intel_dump_batchbuffer(0, ptr, used);
+/* if (INTEL_DEBUG & DEBUG_BATCH) */
+ intel_dump_batchbuffer(batch, ptr);
driBOUnmap(batch->buffer);
batch->map = NULL;
@@ -252,8 +299,9 @@ struct _DriFenceObject *
intel_batchbuffer_flush(struct intel_batchbuffer *batch)
{
struct intel_context *intel = batch->intel;
- GLuint used = batch->ptr - batch->map;
+ GLuint used = batch->segment_finish_offset[0] - batch->segment_start_offset[0];
GLboolean was_locked = intel->locked;
+ GLint *ptr = (GLint *)(batch->map + batch->segment_finish_offset[0]);
if (used == 0)
return batch->last_fence;
@@ -265,19 +313,18 @@ intel_batchbuffer_flush(struct intel_bat
* performance drain that we would like to avoid.
*/
if (used & 4) {
- ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
- ((int *) batch->ptr)[1] = 0;
- ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END;
+ ptr[0] = intel->vtbl.flush_cmd();
+ ptr[1] = 0;
+ ptr[2] = MI_BATCH_BUFFER_END;
used += 12;
}
else {
- ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
- ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END;
+ ptr[0] = intel->vtbl.flush_cmd();
+ ptr[1] = MI_BATCH_BUFFER_END;
used += 8;
}
driBOUnmap(batch->buffer);
- batch->ptr = NULL;
batch->map = NULL;
/* TODO: Just pass the relocation list and dma buffer up to the
@@ -312,33 +359,38 @@ intel_batchbuffer_finish(struct intel_ba
*/
GLboolean
intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+ GLuint segment,
struct _DriBufferObject *buffer,
GLuint flags, GLuint mask, GLuint delta)
{
assert(batch->nr_relocs < MAX_RELOCS);
- driBOAddListItem(&batch->list, buffer, flags, mask);
+ if (buffer != batch->buffer)
+ driBOAddListItem(&batch->list, buffer, flags, mask);
{
struct buffer_reloc *r = &batch->reloc[batch->nr_relocs++];
- driBOReference(buffer);
+
+ if (buffer != batch->buffer)
+ driBOReference(buffer);
+
r->buf = buffer;
- r->offset = batch->ptr - batch->map;
+ r->offset = batch->segment_finish_offset[segment];
r->delta = delta;
}
- batch->ptr += 4;
+ batch->segment_finish_offset[segment] += 4;
return GL_TRUE;
}
-
void
intel_batchbuffer_data(struct intel_batchbuffer *batch,
+ GLuint segment,
const void *data, GLuint bytes, GLuint flags)
{
assert((bytes & 3) == 0);
- intel_batchbuffer_require_space(batch, bytes, flags);
- __memcpy(batch->ptr, data, bytes);
- batch->ptr += bytes;
+ intel_batchbuffer_require_space(batch, segment, bytes, flags);
+ __memcpy(batch->map + batch->segment_finish_offset[segment], data, bytes);
+ batch->segment_finish_offset[segment] += bytes;
}
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
index 916bcae..8b7f988 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
@@ -9,7 +9,8 @@ struct intel_context;
/* Must be able to hold at minimum VB->Size * 3 * 2 bytes for
* intel_idx_render.c indices, which is currently about 20k.
*/
-#define BATCH_SZ (64*1024)
+#define BATCH_SZ (3*32*1024)
+#define SEGMENT_SZ (32*1024)
#define BATCH_RESERVED 16
#define MAX_RELOCS 400
@@ -24,6 +25,13 @@ struct buffer_reloc
GLuint delta; /* not needed? */
};
+enum {
+ SEGMENT_IMMEDIATE = 0,
+ SEGMENT_DYNAMIC_INDIRECT = 1,
+ SEGMENT_OTHER_INDIRECT = 2,
+ NR_SEGMENTS = 3
+};
+
struct intel_batchbuffer
{
struct bufmgr *bm;
@@ -36,11 +44,18 @@ struct intel_batchbuffer
drmBOList list;
GLuint list_count;
GLubyte *map;
- GLubyte *ptr;
struct buffer_reloc reloc[MAX_RELOCS];
GLuint nr_relocs;
GLuint size;
+
+ /* Put all the different types of packets into one buffer for
+ * easier validation. This will have to change, but for now it is
+ * enough to get started.
+ */
+ GLuint segment_start_offset[NR_SEGMENTS];
+ GLuint segment_finish_offset[NR_SEGMENTS];
+ GLuint segment_max_offset[NR_SEGMENTS];
};
struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
@@ -62,12 +77,15 @@ void intel_batchbuffer_reset(struct inte
* intel_buffer_dword() calls.
*/
void intel_batchbuffer_data(struct intel_batchbuffer *batch,
+ GLuint segment,
const void *data, GLuint bytes, GLuint flags);
void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
+ GLuint segment,
GLuint bytes);
GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+ GLuint segment,
struct _DriBufferObject *buffer,
GLuint flags,
GLuint mask, GLuint offset);
@@ -78,27 +96,35 @@ GLboolean intel_batchbuffer_emit_reloc(s
* work...
*/
static INLINE GLuint
-intel_batchbuffer_space(struct intel_batchbuffer *batch)
+intel_batchbuffer_space(struct intel_batchbuffer *batch,
+ GLuint segment)
{
- return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
+ return (batch->segment_max_offset[segment] -
+ batch->segment_finish_offset[segment]);
}
static INLINE void
-intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
+intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch,
+ GLuint segment,
+ GLuint dword)
{
assert(batch->map);
- assert(intel_batchbuffer_space(batch) >= 4);
- *(GLuint *) (batch->ptr) = dword;
- batch->ptr += 4;
+ assert(intel_batchbuffer_space(batch, segment) >= 4);
+ *(GLuint *) (batch->map + batch->segment_finish_offset[segment]) = dword;
+ batch->segment_finish_offset[segment] += 4;
}
static INLINE void
intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
+ GLuint segment,
GLuint sz, GLuint flags)
{
- assert(sz < batch->size - 8);
- if (intel_batchbuffer_space(batch) < sz ||
+ /* XXX: need to figure out flushing, etc.
+ */
+ assert(sz < SEGMENT_SZ);
+
+ if (intel_batchbuffer_space(batch, segment) < sz ||
(batch->flags != 0 && flags != 0 && batch->flags != flags))
intel_batchbuffer_flush(batch);
@@ -109,19 +135,44 @@ intel_batchbuffer_require_space(struct i
*/
#define BATCH_LOCALS
-#define BEGIN_BATCH(n, flags) do { \
+
+/* Hack for indirect emit:
+ */
+#define BEGIN_BATCH_SEGMENT(seg, n, flags) do { \
assert(!intel->prim.flush); \
- intel_batchbuffer_require_space(intel->batch, (n)*4, flags); \
+ intel_batchbuffer_require_space(intel->batch, seg, (n)*4, flags); \
+ _mesa_printf("BEGIN_BATCH(%d,%d,%d) in %s\n", seg, n, flags, __FUNCTION__); \
} while (0)
-#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
+#define OUT_BATCH_SEGMENT(seg, d) do { \
+ _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, d); \
+ intel_batchbuffer_emit_dword(intel->batch, seg, d); \
+} while (0)
+
+#define OUT_BATCH_F_SEGMENT(seg, fl) do { \
+ fi_type fi; \
+ fi.f = fl; \
+ _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, fi.i); \
+ intel_batchbuffer_emit_dword(intel->batch, seg, fi.i); \
+} while (0)
-#define OUT_RELOC(buf,flags,mask,delta) do { \
- assert((delta) >= 0); \
- intel_batchbuffer_emit_reloc(intel->batch, buf, flags, mask, delta); \
+#define OUT_RELOC_SEGMENT(seg, buf,flags,mask,delta) do { \
+ assert((delta) >= 0); \
+ _mesa_printf("OUT_RELOC( seg %d buf %p offset %x )\n", seg, buf, delta); \
+ intel_batchbuffer_emit_reloc(intel->batch, seg, buf, flags, mask, delta); \
} while (0)
-#define ADVANCE_BATCH() do { } while(0)
+#define ADVANCE_BATCH_SEGMENT(seg) do { \
+ _mesa_printf("ADVANCE_BATCH()\n"); \
+} while(0)
+
+
+#define BEGIN_BATCH(n, flags) BEGIN_BATCH_SEGMENT(0, n, flags)
+#define OUT_BATCH(d) OUT_BATCH_SEGMENT(0, d)
+#define OUT_BATCH_F(fl) OUT_BATCH_F_SEGMENT(0, fl)
+#define OUT_RELOC(buf,flags,mask,delta) OUT_RELOC_SEGMENT(0,buf,flags,mask, delta)
+#define ADVANCE_BATCH() ADVANCE_BATCH_SEGMENT(0)
+
#endif
diff --git a/src/mesa/drivers/dri/i915tex/intel_context.c b/src/mesa/drivers/dri/i915tex/intel_context.c
index 6786c5c..208e530 100644
--- a/src/mesa/drivers/dri/i915tex/intel_context.c
+++ b/src/mesa/drivers/dri/i915tex/intel_context.c
@@ -263,7 +263,7 @@ intelFlush(GLcontext * ctx)
INTEL_FIREVERTICES(intel);
- if (intel->batch->map != intel->batch->ptr)
+ if (intel->batch->segment_finish_offset[0] != 0)
intel_batchbuffer_flush(intel->batch);
/* XXX: Need to do an MI_FLUSH here.
diff --git a/src/mesa/drivers/dri/i915tex/intel_tris.c b/src/mesa/drivers/dri/i915tex/intel_tris.c
index 1ba49d8..aaf4d71 100644
--- a/src/mesa/drivers/dri/i915tex/intel_tris.c
+++ b/src/mesa/drivers/dri/i915tex/intel_tris.c
@@ -56,7 +56,7 @@ static void intelRasterPrimitive(GLconte
static void
intel_flush_inline_primitive(struct intel_context *intel)
{
- GLuint used = intel->batch->ptr - intel->prim.start_ptr;
+ GLuint used = intel->batch->segment_finish_offset[0];
assert(intel->prim.primitive != ~0);
@@ -71,7 +71,7 @@ intel_flush_inline_primitive(struct inte
goto finished;
do_discard:
- intel->batch->ptr -= used;
+ intel->batch->segment_finish_offset[0] -= used;
finished:
intel->prim.primitive = ~0;
@@ -95,7 +95,8 @@ intelStartInlinePrimitive(struct intel_c
* be emitted to a batchbuffer missing the required full-state
* preamble.
*/
- if (intel_batchbuffer_space(intel->batch) < 100) {
+ if (intel_batchbuffer_space(intel->batch, 0) < 100) {
+ assert(0); /* XXX: later! */
intel_batchbuffer_flush(intel->batch);
intel->vtbl.emit_state(intel);
}
@@ -108,7 +109,7 @@ intelStartInlinePrimitive(struct intel_c
BEGIN_BATCH(2, batch_flags);
OUT_BATCH(0);
- intel->prim.start_ptr = intel->batch->ptr;
+ intel->prim.start_ptr = intel->batch->map + intel->batch->segment_start_offset[0];
intel->prim.primitive = prim;
intel->prim.flush = intel_flush_inline_primitive;
@@ -138,15 +139,17 @@ intelExtendInlinePrimitive(struct intel_
assert(intel->prim.flush == intel_flush_inline_primitive);
- if (intel_batchbuffer_space(intel->batch) < sz)
+ if (intel_batchbuffer_space(intel->batch, 0) < sz) {
+ assert(0); /* XXX: later */
intelWrapInlinePrimitive(intel);
+ }
/* _mesa_printf("."); */
intel->vtbl.assert_not_dirty(intel);
- ptr = (GLuint *) intel->batch->ptr;
- intel->batch->ptr += sz;
+ ptr = (GLuint *) (intel->batch->map + intel->batch->segment_finish_offset[0]);
+ intel->batch->segment_finish_offset[0] += sz;
return ptr;
}
More information about the mesa-commit
mailing list