[Mesa-dev] [PATCH] r600g: use dirty array to speed up emitting blocks

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Tue Oct 5 09:10:27 PDT 2010


Emitting the dirty blocks sometimes accounted for about 30% percent of the time spent in r600_dri.so, so I made a seperate array with only the dirty blocks.
---
 src/gallium/drivers/r600/r600.h                    |    2 ++
 src/gallium/winsys/r600/drm/evergreen_hw_context.c |   18 +++++++++++++++---
 src/gallium/winsys/r600/drm/r600_hw_context.c      |   20 +++++++++++++++++---
 src/gallium/winsys/r600/drm/r600_priv.h            |    1 +
 4 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 59a255d..8acb8c6 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -221,6 +221,8 @@ struct r600_context {
 	struct r600_range	range[256];
 	unsigned		nblocks;
 	struct r600_block	**blocks;
+	unsigned		ndirty;
+	struct r600_block	**dirty;
 	unsigned		pm4_ndwords;
 	unsigned		pm4_cdwords;
 	unsigned		pm4_dirty_cdwords;
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 96769ad..22c1021 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -613,6 +613,13 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
 		r = -ENOMEM;
 		goto out_err;
 	}
+
+	/* allocate an array for dirty blocks */
+	ctx->dirty = calloc(ctx->nblocks,sizeof(struct r600_block*));
+	if (ctx->dirty == NULL) {
+		r = -ENOMEM;
+		goto out_err;
+	}
 	return 0;
 out_err:
 	r600_context_fini(ctx);
@@ -654,6 +661,7 @@ static inline void evergreen_context_pipe_state_set_resource(struct r600_context
 		r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
 	}
 	if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+		ctx->dirty[ctx->ndirty++] = block;
 		block->status |= R600_BLOCK_STATUS_ENABLED;
 		block->status |= R600_BLOCK_STATUS_DIRTY;
 		ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
@@ -689,6 +697,7 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context
 	block->reg[1] = state->regs[1].value;
 	block->reg[2] = state->regs[2].value;
 	if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+		ctx->dirty[ctx->ndirty++] = block;
 		block->status |= R600_BLOCK_STATUS_ENABLED;
 		block->status |= R600_BLOCK_STATUS_DIRTY;
 		ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
@@ -716,6 +725,7 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c
 	block->reg[3] = state->regs[5].value;
 	block->reg[4] = state->regs[6].value;
 	if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+		ctx->dirty[ctx->ndirty++] = block;
 		block->status |= R600_BLOCK_STATUS_ENABLED;
 		block->status |= R600_BLOCK_STATUS_DIRTY;
 		ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
@@ -800,11 +810,12 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 	}
 
 	/* enough room to copy packet */
-	for (int i = 0; i < ctx->nblocks; i++) {
-		if (ctx->blocks[i]->status & R600_BLOCK_STATUS_DIRTY) {
-			r600_context_block_emit_dirty(ctx, ctx->blocks[i]);
+	for (int i = 0; i < ctx->ndirty; i++) {
+		if (ctx->dirty[i]->status & R600_BLOCK_STATUS_DIRTY) {
+			r600_context_block_emit_dirty(ctx, ctx->dirty[i]);
 		}
 	}
+	ctx->ndirty = 0;
 
 	/* draw packet */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
@@ -881,6 +892,7 @@ static inline void evergreen_resource_set(struct r600_context *ctx, struct r600_
 		r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
 	}
 	if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+		ctx->dirty[ctx->ndirty++] = block;
 		block->status |= R600_BLOCK_STATUS_ENABLED;
 		block->status |= R600_BLOCK_STATUS_DIRTY;
 		ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 84539d1..45c1f0b 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -572,6 +572,7 @@ void r600_context_fini(struct r600_context *ctx)
 	}
 	free(ctx->reloc);
 	free(ctx->pm4);
+	free(ctx->dirty);
 	memset(ctx, 0, sizeof(struct r600_context));
 }
 
@@ -691,6 +692,13 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
 		r = -ENOMEM;
 		goto out_err;
 	}
+
+	/* allocate an array for dirty blocks */
+	ctx->dirty = calloc(ctx->nblocks,sizeof(struct r600_block*));
+	if (ctx->dirty == NULL) {
+		r = -ENOMEM;
+		goto out_err;
+	}
 	return 0;
 out_err:
 	r600_context_fini(ctx);
@@ -759,6 +767,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
 			r600_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo);
 		}
 		if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+			ctx->dirty[ctx->ndirty++] = block;
 			block->status |= R600_BLOCK_STATUS_ENABLED;
 			block->status |= R600_BLOCK_STATUS_DIRTY;
 			ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
@@ -800,6 +809,7 @@ static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx
 		r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
 	}
 	if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+		ctx->dirty[ctx->ndirty++] = block;
 		block->status |= R600_BLOCK_STATUS_ENABLED;
 		block->status |= R600_BLOCK_STATUS_DIRTY;
 		ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
@@ -835,6 +845,7 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx,
 	block->reg[1] = state->regs[1].value;
 	block->reg[2] = state->regs[2].value;
 	if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+		ctx->dirty[ctx->ndirty++] = block;
 		block->status |= R600_BLOCK_STATUS_ENABLED;
 		block->status |= R600_BLOCK_STATUS_DIRTY;
 		ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
@@ -860,6 +871,7 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex
 	block->reg[2] = state->regs[5].value;
 	block->reg[3] = state->regs[6].value;
 	if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+		ctx->dirty[ctx->ndirty++] = block;
 		block->status |= R600_BLOCK_STATUS_ENABLED;
 		block->status |= R600_BLOCK_STATUS_DIRTY;
 		ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords;
@@ -959,11 +971,12 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 	}
 
 	/* enough room to copy packet */
-	for (int i = 0; i < ctx->nblocks; i++) {
-		if (ctx->blocks[i]->status & R600_BLOCK_STATUS_DIRTY) {
-			r600_context_block_emit_dirty(ctx, ctx->blocks[i]);
+	for (int i = 0; i < ctx->ndirty; i++) {
+		if (ctx->dirty[i]->status & R600_BLOCK_STATUS_DIRTY) {
+			r600_context_block_emit_dirty(ctx, ctx->dirty[i]);
 		}
 	}
+	ctx->ndirty = 0;
 
 	/* draw packet */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
@@ -1052,6 +1065,7 @@ void r600_context_flush(struct r600_context *ctx)
 	 */
 	for (int i = 0; i < ctx->nblocks; i++) {
 		if (ctx->blocks[i]->status & R600_BLOCK_STATUS_ENABLED) {
+			ctx->dirty[ctx->ndirty++] = ctx->blocks[i];
 			ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords;
 			ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY;
 		}
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index f6ceb0a..1d0c122 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -134,6 +134,7 @@ static void inline r600_context_reg(struct r600_context *ctx,
 	block->reg[id] &= ~mask;
 	block->reg[id] |= value;
 	if (!(block->status & R600_BLOCK_STATUS_DIRTY)) {
+		ctx->dirty[ctx->ndirty++] = block;
 		ctx->pm4_dirty_cdwords += block->pm4_ndwords;
 		block->status |= R600_BLOCK_STATUS_ENABLED;
 		block->status |= R600_BLOCK_STATUS_DIRTY;
-- 
1.7.1



More information about the mesa-dev mailing list