Mesa (master): r600g: precompute some of the hw state

Jerome Glisse glisse at kemper.freedesktop.org
Mon Aug 30 19:02:24 UTC 2010


Module: Mesa
Branch: master
Commit: de0b76cab22caa9fc7260f80acb8f151ccced6c5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=de0b76cab22caa9fc7260f80acb8f151ccced6c5

Author: Jerome Glisse <jglisse at redhat.com>
Date:   Sun Aug 29 21:01:51 2010 -0400

r600g: precompute some of the hw state

Idea is to build hw state at pipe state creation and
reuse them while keeping a non PM4 packet interface
btw winsys & pipe driver. This commit also force rebuild
of pm4 packet on each call to radeon_state_pm4 which
in turn slow down everythings, this will be addressed.

Signed-off-by: Jerome Glisse <jglisse at redhat.com>

---

 src/gallium/drivers/r600/r600_blit.c       |    4 +-
 src/gallium/drivers/r600/r600_context.h    |    2 +-
 src/gallium/drivers/r600/r600_resource.h   |    2 +-
 src/gallium/drivers/r600/r600_screen.h     |    2 +-
 src/gallium/drivers/r600/r600_state.c      |  226 +++++++++-------------------
 src/gallium/drivers/r600/r600_texture.c    |   43 ++++--
 src/gallium/drivers/r600/radeon.h          |   47 ++++--
 src/gallium/winsys/r600/drm/r600_state.c   |    9 +-
 src/gallium/winsys/r600/drm/r600_states.h  |   28 +++-
 src/gallium/winsys/r600/drm/radeon_priv.h  |   20 ++--
 src/gallium/winsys/r600/drm/radeon_state.c |   62 +++++++--
 11 files changed, 229 insertions(+), 216 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index be1fcf9..dbcd6cd 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -670,7 +670,7 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te
 	if (r) {
 		return r;
 	}
-	r = r600_texture_cb0(ctx, rtexture, level);
+	r = r600_texture_cb(ctx, rtexture, 0, level);
 	if (r) {
 		return r;
 	}
@@ -772,7 +772,7 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te
 	if (r) {
 		goto out;
 	}
-	r = radeon_draw_set(draw, rtexture->cb0[level]);
+	r = radeon_draw_set(draw, rtexture->cb[0][level]);
 	if (r) {
 		goto out;
 	}
diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h
index d96d5b5..e9495f0 100644
--- a/src/gallium/drivers/r600/r600_context.h
+++ b/src/gallium/drivers/r600/r600_context.h
@@ -121,7 +121,7 @@ struct r600_context_hw_states {
 	struct radeon_state	*config;
 	struct radeon_state	*cb_cntl;
 	struct radeon_state	*db;
-	struct radeon_state	*ucp[6];
+	struct radeon_state	*ucp;
 	unsigned		ps_nresource;
 	unsigned		ps_nsampler;
 	struct radeon_state	*ps_resource[160];
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index b880f93..8078a83 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -57,7 +57,7 @@ struct r600_resource_texture {
 	unsigned			dirty;
 	struct radeon_bo		*uncompressed;
 	struct radeon_state		*scissor[PIPE_MAX_TEXTURE_LEVELS];
-	struct radeon_state		*cb0[PIPE_MAX_TEXTURE_LEVELS];
+	struct radeon_state		*cb[8][PIPE_MAX_TEXTURE_LEVELS];
 	struct radeon_state		*db[PIPE_MAX_TEXTURE_LEVELS];
 	struct radeon_state		*viewport[PIPE_MAX_TEXTURE_LEVELS];
 };
diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h
index 438976f..b9938f1 100644
--- a/src/gallium/drivers/r600/r600_screen.h
+++ b/src/gallium/drivers/r600/r600_screen.h
@@ -84,7 +84,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
 void r600_texture_transfer_unmap(struct pipe_context *ctx,
 				 struct pipe_transfer* transfer);
 int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
-int r600_texture_cb0(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
+int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level);
 int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
 int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
 int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index b5db848..6049e13 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -34,6 +34,16 @@
 #include "r600d.h"
 #include "r600_state_inlines.h"
 
+static struct radeon_state *r600_blend(struct r600_context *rctx, const struct pipe_blend_state *state);
+static struct radeon_state *r600_viewport(struct r600_context *rctx, const struct pipe_viewport_state *state);
+static struct radeon_state *r600_ucp(struct r600_context *rctx, const struct pipe_clip_state *state);
+static struct radeon_state *r600_sampler(struct r600_context *rctx,
+				const struct pipe_sampler_state *state,
+				unsigned id);
+static struct radeon_state *r600_resource(struct pipe_context *ctx,
+					const struct pipe_sampler_view *view,
+					unsigned id);
+
 static void *r600_create_blend_state(struct pipe_context *ctx,
 					const struct pipe_blend_state *state)
 {
@@ -86,6 +96,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
 	rstate->state.sampler_view.texture = texture;
 	rstate->state.sampler_view.reference.count = 1;
 	rstate->state.sampler_view.context = ctx;
+	rstate->rstate = r600_resource(ctx, &rstate->state.sampler_view, 0);
 	return &rstate->state.sampler_view;
 }
 
@@ -229,6 +240,9 @@ static void r600_bind_ps_sampler(struct pipe_context *ctx,
 	for (i = 0; i < count; i++) {
 		rstate = (struct r600_context_state *)states[i];
 		rctx->ps_sampler[i] = r600_context_state_incref(rstate);
+		if (rstate) {
+			radeon_state_convert(rstate->rstate, R600_STATE_SAMPLER, i, R600_SHADER_PS);
+		}
 	}
 	rctx->ps_nsampler = count;
 }
@@ -246,6 +260,9 @@ static void r600_bind_vs_sampler(struct pipe_context *ctx,
 	for (i = 0; i < count; i++) {
 		rstate = (struct r600_context_state *)states[i];
 		rctx->vs_sampler[i] = r600_context_state_incref(rstate);
+		if (rstate) {
+			radeon_state_convert(rstate->rstate, R600_STATE_SAMPLER, i, R600_SHADER_VS);
+		}
 	}
 	rctx->vs_nsampler = count;
 }
@@ -337,6 +354,9 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx,
 	for (i = 0; i < count; i++) {
 		rstate = (struct r600_context_state *)views[i];
 		rctx->ps_sampler_view[i] = r600_context_state_incref(rstate);
+		if (rstate) {
+			radeon_state_convert(rstate->rstate, R600_STATE_RESOURCE, i, R600_SHADER_PS);
+		}
 	}
 	rctx->ps_nsampler_view = count;
 }
@@ -355,6 +375,9 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx,
 	for (i = 0; i < count; i++) {
 		rstate = (struct r600_context_state *)views[i];
 		rctx->vs_sampler_view[i] = r600_context_state_incref(rstate);
+		if (rstate) {
+			radeon_state_convert(rstate->rstate, R600_STATE_RESOURCE, i, R600_SHADER_VS);
+		}
 	}
 	rctx->vs_nsampler_view = count;
 }
@@ -363,10 +386,19 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 					const struct pipe_framebuffer_state *state)
 {
 	struct r600_context *rctx = r600_context(ctx);
+	struct r600_resource_texture *rtexture;
 	struct r600_context_state *rstate;
 
 	rstate = r600_context_state(rctx, pipe_framebuffer_type, state);
 	r600_bind_state(ctx, rstate);
+	for (int i = 0; i < state->nr_cbufs; i++) {
+		rtexture = (struct r600_resource_texture*)state->cbufs[i]->texture;
+		r600_texture_cb(ctx, rtexture, i, state->cbufs[i]->level);
+	}
+	if (state->zsbuf) {
+		rtexture = (struct r600_resource_texture*)state->zsbuf->texture;
+		r600_texture_db(ctx, rtexture, state->zsbuf->level);
+	}
 }
 
 static void r600_set_polygon_stipple(struct pipe_context *ctx,
@@ -565,6 +597,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
 		break;
 	case pipe_viewport_type:
 		rstate->state.viewport = (*states).viewport;
+		rstate->rstate = r600_viewport(rctx, &rstate->state.viewport);
 		break;
 	case pipe_depth_type:
 		rstate->state.depth = (*states).depth;
@@ -580,6 +613,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
 		break;
 	case pipe_clip_type:
 		rstate->state.clip = (*states).clip;
+		rstate->rstate = r600_ucp(rctx, &rstate->state.clip);
 		break;
 	case pipe_stencil_type:
 		rstate->state.stencil = (*states).stencil;
@@ -592,6 +626,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
 		break;
 	case pipe_blend_type:
 		rstate->state.blend = (*states).blend;
+		rstate->rstate = r600_blend(rctx, &rstate->state.blend);
 		break;
 	case pipe_stencil_ref_type:
 		rstate->state.stencil_ref = (*states).stencil_ref;
@@ -606,6 +641,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
 		break;
 	case pipe_sampler_type:
 		rstate->state.sampler = (*states).sampler;
+		rstate->rstate = r600_sampler(rctx, &rstate->state.sampler, 0);
 		break;
 	default:
 		R600_ERR("invalid type %d\n", rstate->type);
@@ -615,11 +651,10 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
 	return rstate;
 }
 
-static struct radeon_state *r600_blend(struct r600_context *rctx)
+static struct radeon_state *r600_blend(struct r600_context *rctx, const struct pipe_blend_state *state)
 {
 	struct r600_screen *rscreen = rctx->screen;
 	struct radeon_state *rstate;
-	const struct pipe_blend_state *state = &rctx->blend->state.blend;
 	int i;
 
 	rstate = radeon_state(rscreen->rw, R600_STATE_BLEND, 0);
@@ -675,129 +710,28 @@ static struct radeon_state *r600_blend(struct r600_context *rctx)
 	return rstate;
 }
 
-static struct radeon_state *r600_ucp(struct r600_context *rctx, int clip)
+static struct radeon_state *r600_ucp(struct r600_context *rctx, const struct pipe_clip_state *state)
 {
 	struct r600_screen *rscreen = rctx->screen;
 	struct radeon_state *rstate;
-	const struct pipe_clip_state *state = &rctx->clip->state.clip;
 
-	rstate = radeon_state(rscreen->rw, R600_STATE_CLIP, clip);
+	rstate = radeon_state(rscreen->rw, R600_STATE_UCP, 0);
 	if (rstate == NULL)
 		return NULL;
 
-	rstate->states[R600_CLIP__PA_CL_UCP_X_0] = fui(state->ucp[clip][0]);
-	rstate->states[R600_CLIP__PA_CL_UCP_Y_0] = fui(state->ucp[clip][1]);
-	rstate->states[R600_CLIP__PA_CL_UCP_Z_0] = fui(state->ucp[clip][2]);
-	rstate->states[R600_CLIP__PA_CL_UCP_W_0] = fui(state->ucp[clip][3]);
-
-	if (radeon_state_pm4(rstate)) {
-		radeon_state_decref(rstate);
-		return NULL;
+	for (int i = 0; i < state->nr; i++) {
+		rstate->states[i * 4 + 0] = fui(state->ucp[i][0]);
+		rstate->states[i * 4 + 1] = fui(state->ucp[i][1]);
+		rstate->states[i * 4 + 2] = fui(state->ucp[i][2]);
+		rstate->states[i * 4 + 3] = fui(state->ucp[i][3]);
 	}
-	return rstate;
 
-}
-
-static struct radeon_state *r600_cb(struct r600_context *rctx, int cb)
-{
-	struct r600_screen *rscreen = rctx->screen;
-	struct r600_resource_texture *rtex;
-	struct r600_resource *rbuffer;
-	struct radeon_state *rstate;
-	const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer;
-	unsigned level = state->cbufs[cb]->level;
-	unsigned pitch, slice;
-	unsigned color_info;
-	unsigned format, swap, ntype;
-	const struct util_format_description *desc;
-
-	rstate = radeon_state(rscreen->rw, R600_STATE_CB0 + cb, 0);
-	if (rstate == NULL)
-		return NULL;
-	rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
-	rbuffer = &rtex->resource;
-	rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
-	rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
-	rstate->bo[2] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
-	rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
-	rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
-	rstate->placement[4] = RADEON_GEM_DOMAIN_GTT;
-	rstate->nbo = 3;
-	pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
-	slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1;
-
-	ntype = 0;
-	desc = util_format_description(rtex->resource.base.b.format);
-	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
-		ntype = V_0280A0_NUMBER_SRGB;
-
-	format = r600_translate_colorformat(rtex->resource.base.b.format);
-	swap = r600_translate_colorswap(rtex->resource.base.b.format);
-
-	color_info = S_0280A0_FORMAT(format) |
-		S_0280A0_COMP_SWAP(swap) |
-		S_0280A0_BLEND_CLAMP(1) |
-		S_0280A0_SOURCE_FORMAT(1) |
-		S_0280A0_NUMBER_TYPE(ntype);
-
-	rstate->states[R600_CB0__CB_COLOR0_BASE] = rtex->offset[level] >> 8;
-	rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info;
-	rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) |
-						S_028060_SLICE_TILE_MAX(slice);
-	rstate->states[R600_CB0__CB_COLOR0_VIEW] = 0x00000000;
-	rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000;
-	rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000;
-	rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000;
 	if (radeon_state_pm4(rstate)) {
 		radeon_state_decref(rstate);
 		return NULL;
 	}
 	return rstate;
-}
 
-static struct radeon_state *r600_db(struct r600_context *rctx)
-{
-	struct r600_screen *rscreen = rctx->screen;
-	struct r600_resource_texture *rtex;
-	struct r600_resource *rbuffer;
-	struct radeon_state *rstate;
-	const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer;
-	unsigned level;
-	unsigned pitch, slice, format;
-
-	if (state->zsbuf == NULL)
-		return NULL;
-
-	rstate = radeon_state(rscreen->rw, R600_STATE_DB, 0);
-	if (rstate == NULL)
-		return NULL;
-
-	rtex = (struct r600_resource_texture*)state->zsbuf->texture;
-	rtex->tilled = 1;
-	rtex->array_mode = 2;
-	rtex->tile_type = 1;
-	rtex->depth = 1;
-	rbuffer = &rtex->resource;
-
-	rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
-	rstate->nbo = 1;
-	rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM;
-	level = state->zsbuf->level;
-	pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
-	slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1;
-	format = r600_translate_dbformat(state->zsbuf->texture->format);
-	rstate->states[R600_DB__DB_DEPTH_BASE] = rtex->offset[level] >> 8;
-	rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtex->array_mode) |
-					S_028010_FORMAT(format);
-	rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000;
-	rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1;
-	rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) |
-						S_028000_SLICE_TILE_MAX(slice);
-	if (radeon_state_pm4(rstate)) {
-		radeon_state_decref(rstate);
-		return NULL;
-	}
-	return rstate;
 }
 
 static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
@@ -954,9 +888,8 @@ static struct radeon_state *r600_scissor(struct r600_context *rctx)
 	return rstate;
 }
 
-static struct radeon_state *r600_viewport(struct r600_context *rctx)
+static struct radeon_state *r600_viewport(struct r600_context *rctx, const struct pipe_viewport_state *state)
 {
-	const struct pipe_viewport_state *state = &rctx->viewport->state.viewport;
 	struct r600_screen *rscreen = rctx->screen;
 	struct radeon_state *rstate;
 
@@ -1366,6 +1299,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
 int r600_context_hw_states(struct pipe_context *ctx)
 {
 	struct r600_context *rctx = r600_context(ctx);
+	struct r600_resource_texture *rtexture;
 	unsigned i;
 	int r;
 	int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
@@ -1377,69 +1311,59 @@ int r600_context_hw_states(struct pipe_context *ctx)
 	/* free previous TODO determine what need to be updated, what
 	 * doesn't
 	 */
-	//radeon_state_decref(rctx->hw_states.config);
 	rctx->hw_states.cb_cntl = radeon_state_decref(rctx->hw_states.cb_cntl);
-	rctx->hw_states.db = radeon_state_decref(rctx->hw_states.db);
 	rctx->hw_states.rasterizer = radeon_state_decref(rctx->hw_states.rasterizer);
 	rctx->hw_states.scissor = radeon_state_decref(rctx->hw_states.scissor);
 	rctx->hw_states.dsa = radeon_state_decref(rctx->hw_states.dsa);
-	rctx->hw_states.blend = radeon_state_decref(rctx->hw_states.blend);
-	rctx->hw_states.viewport = radeon_state_decref(rctx->hw_states.viewport);
-	for (i = 0; i < 8; i++) {
-		rctx->hw_states.cb[i] = radeon_state_decref(rctx->hw_states.cb[i]);
-	}
-	for (i = 0; i < 6; i++) {
-		rctx->hw_states.ucp[i] = radeon_state_decref(rctx->hw_states.ucp[i]);
-	}
-	for (i = 0; i < rctx->hw_states.ps_nresource; i++) {
-		radeon_state_decref(rctx->hw_states.ps_resource[i]);
-		rctx->hw_states.ps_resource[i] = NULL;
-	}
-	rctx->hw_states.ps_nresource = 0;
-	for (i = 0; i < rctx->hw_states.ps_nsampler; i++) {
-		radeon_state_decref(rctx->hw_states.ps_sampler[i]);
-		rctx->hw_states.ps_sampler[i] = NULL;
-	}
-	rctx->hw_states.ps_nsampler = 0;
 
 	/* build new states */
+	rctx->hw_states.blend = NULL;
+	rctx->hw_states.viewport = NULL;
+	rctx->hw_states.ucp = NULL;
 	rctx->hw_states.rasterizer = r600_rasterizer(rctx);
 	rctx->hw_states.scissor = r600_scissor(rctx);
 	rctx->hw_states.dsa = r600_dsa(rctx);
-	rctx->hw_states.blend = r600_blend(rctx);
-	rctx->hw_states.viewport = r600_viewport(rctx);
-	for (i = 0; i < nr_cbufs; i++) {
-		rctx->hw_states.cb[i] = r600_cb(rctx, i);
+	rctx->hw_states.cb_cntl = r600_cb_cntl(rctx);
+	if (rctx->viewport) {
+		rctx->hw_states.viewport = rctx->viewport->rstate;
 	}
-	for (i = 0; i < ucp_nclip; i++) {
-		rctx->hw_states.ucp[i] = r600_ucp(rctx, i);
+	if (rctx->blend) {
+		rctx->hw_states.blend = rctx->blend->rstate;
 	}
-	rctx->hw_states.db = r600_db(rctx);
-	rctx->hw_states.cb_cntl = r600_cb_cntl(rctx);
+	if (rctx->clip) {
+		rctx->hw_states.ucp = rctx->clip->rstate;
+	}
+	for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) {
+		rtexture = (struct r600_resource_texture*)rctx->framebuffer->state.framebuffer.cbufs[i]->texture;
+		rctx->hw_states.cb[i] = rtexture->cb[i][rctx->framebuffer->state.framebuffer.cbufs[i]->level];
+	}
+	if (rctx->framebuffer->state.framebuffer.zsbuf) {
+		rtexture = (struct r600_resource_texture*)rctx->framebuffer->state.framebuffer.zsbuf->texture;
+		rctx->hw_states.db = rtexture->db[rctx->framebuffer->state.framebuffer.zsbuf->level];
+	}
+
 
 	for (i = 0; i < rctx->ps_nsampler; i++) {
 		if (rctx->ps_sampler[i]) {
-			rctx->hw_states.ps_sampler[i] = r600_sampler(rctx,
-							&rctx->ps_sampler[i]->state.sampler,
-							i);
+			rctx->hw_states.ps_sampler[i] = rctx->ps_sampler[i]->rstate;
+		} else {
+			rctx->hw_states.ps_sampler[i] = NULL;
 		}
 	}
 	rctx->hw_states.ps_nsampler = rctx->ps_nsampler;
 	for (i = 0; i < rctx->ps_nsampler_view; i++) {
 		if (rctx->ps_sampler_view[i]) {
-			rctx->hw_states.ps_resource[i] = r600_resource(ctx,
-							&rctx->ps_sampler_view[i]->state.sampler_view,
-							i);
+			rctx->hw_states.ps_resource[i] = rctx->ps_sampler_view[i]->rstate;
+		} else {
+			rctx->hw_states.ps_resource[i] = NULL;
 		}
 	}
 	rctx->hw_states.ps_nresource = rctx->ps_nsampler_view;
 
 	/* bind states */
-	for (i = 0; i < ucp_nclip; i++) {
-		r = radeon_draw_set(rctx->draw, rctx->hw_states.ucp[i]);
-		if (r)
-			return r;
-	}
+	r = radeon_draw_set(rctx->draw, rctx->hw_states.ucp);
+	if (r)
+		return r;
 	r = radeon_draw_set(rctx->draw, rctx->hw_states.db);
 	if (r)
 		return r;
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 77d627c..ec1d505 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -128,13 +128,25 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
 	return &resource->base.b;
 }
 
+static void r600_texture_destroy_state(struct pipe_resource *ptexture)
+{
+	struct r600_resource_texture *rtexture = (struct r600_resource_texture*)ptexture;
+
+	for (int i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) {
+		radeon_state_decref(rtexture->scissor[i]);
+		radeon_state_decref(rtexture->db[i]);
+		for (int j = 0; j < 8; j++) {
+			radeon_state_decref(rtexture->cb[j][i]);
+		}
+	}
+}
+
 static void r600_texture_destroy(struct pipe_screen *screen,
 				 struct pipe_resource *ptex)
 {
 	struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
 	struct r600_resource *resource = &rtex->resource;
 	struct r600_screen *rscreen = r600_screen(screen);
-	unsigned i;
 
 	if (resource->bo) {
 		radeon_bo_decref(rscreen->rw, resource->bo);
@@ -142,11 +154,7 @@ static void r600_texture_destroy(struct pipe_screen *screen,
 	if (rtex->uncompressed) {
 		radeon_bo_decref(rscreen->rw, rtex->uncompressed);
 	}
-	for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) {
-		radeon_state_decref(rtex->scissor[i]);
-		radeon_state_decref(rtex->cb0[i]);
-		radeon_state_decref(rtex->db[i]);
-	}
+	r600_texture_destroy_state(ptex);
 	FREE(rtex);
 }
 
@@ -211,9 +219,12 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
 	pipe_reference_init(&resource->base.b.reference, 1);
 	resource->base.b.screen = screen;
 	resource->bo = bo;
+	rtex->depth = 0;
 	rtex->pitch_override = whandle->stride;
 	rtex->bpt = util_format_get_blocksize(templ->format);
 	rtex->pitch[0] = whandle->stride;
+	rtex->width[0] = templ->width0;
+	rtex->height[0] = templ->height0;
 	rtex->offset[0] = 0;
 	rtex->size = align(rtex->pitch[0] * templ->height0, 64);
 
@@ -696,9 +707,9 @@ static struct radeon_state *r600_texture_state_scissor(struct r600_screen *rscre
 	return rstate;
 }
 
-static struct radeon_state *r600_texture_state_cb0(struct r600_screen *rscreen,
+static struct radeon_state *r600_texture_state_cb(struct r600_screen *rscreen,
 				struct r600_resource_texture *rtexture,
-				unsigned level)
+				unsigned cb, unsigned level)
 {
 	struct radeon_state *rstate;
 	struct r600_resource *rbuffer;
@@ -707,7 +718,7 @@ static struct radeon_state *r600_texture_state_cb0(struct r600_screen *rscreen,
 	unsigned format, swap, ntype;
 	const struct util_format_description *desc;
 
-	rstate = radeon_state(rscreen->rw, R600_STATE_CB0, 0);
+	rstate = radeon_state(rscreen->rw, R600_STATE_CB0 + cb, 0);
 	if (rstate == NULL)
 		return NULL;
 	rbuffer = &rtexture->resource;
@@ -770,6 +781,10 @@ static struct radeon_state *r600_texture_state_db(struct r600_screen *rscreen,
 	if (rstate == NULL)
 		return NULL;
 	rbuffer = &rtexture->resource;
+	rtexture->tilled = 1;
+	rtexture->array_mode = 2;
+	rtexture->tile_type = 1;
+	rtexture->depth = 1;
 
 	/* set states (most default value are 0 and struct already
 	 * initialized to 0, thus avoid resetting them)
@@ -838,14 +853,14 @@ static struct radeon_state *r600_texture_state_viewport(struct r600_screen *rscr
 	return rstate;
 }
 
-int r600_texture_cb0(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
+int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level)
 {
 	struct r600_screen *rscreen = r600_screen(ctx->screen);
 
-	if (rtexture->cb0[level] == NULL) {
-		rtexture->cb0[level] = r600_texture_state_cb0(rscreen, rtexture, level);
-		if (rtexture->cb0[level] == NULL) {
-			R600_ERR("failed to create cb0 state for texture\n");
+	if (rtexture->cb[cb][level] == NULL) {
+		rtexture->cb[cb][level] = r600_texture_state_cb(rscreen, rtexture, cb, level);
+		if (rtexture->cb[cb][level] == NULL) {
+			R600_ERR("failed to create cb%d state for texture\n", cb);
 			return -ENOMEM;
 		}
 	}
diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h
index 046c264..3f1ca95 100644
--- a/src/gallium/drivers/r600/radeon.h
+++ b/src/gallium/drivers/r600/radeon.h
@@ -109,13 +109,11 @@ struct radeon_state {
 	unsigned			id;
 	unsigned                        shader_index;
 	unsigned			nstates;
-	u32				*states;
+	u32				states[64];
 	unsigned			npm4;
 	unsigned			cpm4;
 	u32				pm4_crc;
-	u32				*pm4;
-	u32				nimmd;
-	u32				*immd;
+	u32				pm4[128];
 	unsigned			nbo;
 	struct radeon_bo		*bo[4];
 	unsigned			nreloc;
@@ -130,6 +128,7 @@ struct radeon_state *radeon_state_shader(struct radeon *radeon, u32 type, u32 id
 struct radeon_state *radeon_state_incref(struct radeon_state *state);
 struct radeon_state *radeon_state_decref(struct radeon_state *state);
 int radeon_state_pm4(struct radeon_state *state);
+int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type);
 
 /*
  * draw functions
@@ -219,7 +218,7 @@ enum r600_stype {
 	R600_STATE_DB,
 	R600_STATE_QUERY_BEGIN,
 	R600_STATE_QUERY_END,
-	R600_STATE_CLIP,
+	R600_STATE_UCP,
 	R600_STATE_VGT,
 	R600_STATE_DRAW,
 };
@@ -613,17 +612,37 @@ enum {
 /* R600_DRAW */
 #define R600_DRAW__VGT_NUM_INDICES			0
 #define R600_DRAW__VGT_DMA_BASE_HI			1
-#define R600_DRAW__VGT_DMA_BASE			2
+#define R600_DRAW__VGT_DMA_BASE				2
 #define R600_DRAW__VGT_DRAW_INITIATOR			3
-#define R600_DRAW_SIZE				4
-#define R600_DRAW_PM4				128
+#define R600_DRAW_SIZE					4
+#define R600_DRAW_PM4					128
 /* R600_CLIP */
-#define R600_CLIP__PA_CL_UCP_X_0  0
-#define R600_CLIP__PA_CL_UCP_Y_0  1
-#define R600_CLIP__PA_CL_UCP_Z_0  2
-#define R600_CLIP__PA_CL_UCP_W_0  3
-#define R600_CLIP_SIZE  4
-#define R600_CLIP_PM4 128
+#define R600_CLIP__PA_CL_UCP_X_0			0
+#define R600_CLIP__PA_CL_UCP_Y_0			1
+#define R600_CLIP__PA_CL_UCP_Z_0			2
+#define R600_CLIP__PA_CL_UCP_W_0			3
+#define R600_CLIP__PA_CL_UCP_X_1			4
+#define R600_CLIP__PA_CL_UCP_Y_1			5
+#define R600_CLIP__PA_CL_UCP_Z_1			6
+#define R600_CLIP__PA_CL_UCP_W_1			7
+#define R600_CLIP__PA_CL_UCP_X_2			8
+#define R600_CLIP__PA_CL_UCP_Y_2			9
+#define R600_CLIP__PA_CL_UCP_Z_2			10
+#define R600_CLIP__PA_CL_UCP_W_2			11
+#define R600_CLIP__PA_CL_UCP_X_3			12
+#define R600_CLIP__PA_CL_UCP_Y_3			13
+#define R600_CLIP__PA_CL_UCP_Z_3			14
+#define R600_CLIP__PA_CL_UCP_W_3			15
+#define R600_CLIP__PA_CL_UCP_X_4			16
+#define R600_CLIP__PA_CL_UCP_Y_4			17
+#define R600_CLIP__PA_CL_UCP_Z_4			18
+#define R600_CLIP__PA_CL_UCP_W_4			19
+#define R600_CLIP__PA_CL_UCP_X_5			20
+#define R600_CLIP__PA_CL_UCP_Y_5			21
+#define R600_CLIP__PA_CL_UCP_Z_5			22
+#define R600_CLIP__PA_CL_UCP_W_5			23
+#define R600_CLIP_SIZE					24
+#define R600_CLIP_PM4					128
 /* R600 QUERY BEGIN/END */
 #define R600_QUERY__OFFSET			0
 #define R600_QUERY_SIZE				1
diff --git a/src/gallium/winsys/r600/drm/r600_state.c b/src/gallium/winsys/r600/drm/r600_state.c
index e3d0116..f6a428e 100644
--- a/src/gallium/winsys/r600/drm/r600_state.c
+++ b/src/gallium/winsys/r600/drm/r600_state.c
@@ -80,7 +80,7 @@ struct radeon_stype_info r600_stypes[] = {
 	{ R600_STATE_QUERY_BEGIN, 1, 0, r600_state_pm4_query_begin, SUB_NONE(VGT_EVENT) },
 	{ R600_STATE_QUERY_END, 1, 0, r600_state_pm4_query_end, SUB_NONE(VGT_EVENT) },
 	{ R600_STATE_DB, 1, 0, r600_state_pm4_db, SUB_NONE(DB) },
-	{ R600_STATE_CLIP, 6, 0, r600_state_pm4_generic, SUB_NONE(UCP) },
+	{ R600_STATE_UCP, 1, 0, r600_state_pm4_generic, SUB_NONE(UCP) },
 	{ R600_STATE_VGT, 1, 0, r600_state_pm4_vgt, SUB_NONE(VGT) },
 	{ R600_STATE_DRAW, 1, 0, r600_state_pm4_draw, SUB_NONE(DRAW) },
 };
@@ -381,13 +381,6 @@ static int r600_state_pm4_draw(struct radeon_state *state)
 		if (r)
 			return r;
 		state->pm4[state->cpm4++] = state->bo[0]->handle;
-	} else if  (state->nimmd) {
-		state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX_IMMD, state->nimmd + 1);
-		state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES];
-		state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DRAW_INITIATOR];
-		for (i = 0; i < state->nimmd; i++) {
-			state->pm4[state->cpm4++] = state->immd[i];
-		}
 	} else {
 		state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
 		state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES];
diff --git a/src/gallium/winsys/r600/drm/r600_states.h b/src/gallium/winsys/r600/drm/r600_states.h
index 51b69b9..09d79d4 100644
--- a/src/gallium/winsys/r600/drm/r600_states.h
+++ b/src/gallium/winsys/r600/drm/r600_states.h
@@ -284,10 +284,30 @@ static const struct radeon_register R600_names_VS_CONSTANT[] = {
 };
 
 static const struct radeon_register R600_names_UCP[] = {
-	{0x00028e20, 0, 0, "PA_CL_UCP0_X"},
-	{0x00028e24, 0, 0, "PA_CL_UCP0_Y"},
-	{0x00028e28, 0, 0, "PA_CL_UCP0_Z"},
-	{0x00028e2c, 0, 0, "PA_CL_UCP0_W"},
+	{0x00028E20, 0, 0, "PA_CL_UCP0_X"},
+	{0x00028E24, 0, 0, "PA_CL_UCP0_Y"},
+	{0x00028E28, 0, 0, "PA_CL_UCP0_Z"},
+	{0x00028E2C, 0, 0, "PA_CL_UCP0_W"},
+	{0x00028E30, 0, 0, "PA_CL_UCP1_X"},
+	{0x00028E34, 0, 0, "PA_CL_UCP1_Y"},
+	{0x00028E38, 0, 0, "PA_CL_UCP1_Z"},
+	{0x00028E3C, 0, 0, "PA_CL_UCP1_W"},
+	{0x00028E40, 0, 0, "PA_CL_UCP2_X"},
+	{0x00028E44, 0, 0, "PA_CL_UCP2_Y"},
+	{0x00028E48, 0, 0, "PA_CL_UCP2_Z"},
+	{0x00028E4C, 0, 0, "PA_CL_UCP2_W"},
+	{0x00028E50, 0, 0, "PA_CL_UCP3_X"},
+	{0x00028E54, 0, 0, "PA_CL_UCP3_Y"},
+	{0x00028E58, 0, 0, "PA_CL_UCP3_Z"},
+	{0x00028E5C, 0, 0, "PA_CL_UCP3_W"},
+	{0x00028E60, 0, 0, "PA_CL_UCP4_X"},
+	{0x00028E64, 0, 0, "PA_CL_UCP4_Y"},
+	{0x00028E68, 0, 0, "PA_CL_UCP4_Z"},
+	{0x00028E6C, 0, 0, "PA_CL_UCP4_W"},
+	{0x00028E70, 0, 0, "PA_CL_UCP5_X"},
+	{0x00028E74, 0, 0, "PA_CL_UCP5_Y"},
+	{0x00028E78, 0, 0, "PA_CL_UCP5_Z"},
+	{0x00028E7C, 0, 0, "PA_CL_UCP5_W"},
 };
 
 static const struct radeon_register R600_names_PS_RESOURCE[] = {
diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h
index 66ee5f2..af5319e 100644
--- a/src/gallium/winsys/r600/drm/radeon_priv.h
+++ b/src/gallium/winsys/r600/drm/radeon_priv.h
@@ -38,19 +38,19 @@ struct radeon_register {
 };
 
 struct radeon_sub_type {
-	int shader_type;
-	const struct radeon_register *regs;
-	unsigned nstates;
+	int				shader_type;
+	const struct radeon_register	*regs;
+	unsigned			nstates;
 };
 
 struct radeon_stype_info {
-	unsigned stype;
-	unsigned num;
-	unsigned stride;
-	radeon_state_pm4_t pm4;
-	struct radeon_sub_type reginfo[R600_SHADER_MAX];
-	unsigned base_id;
-	unsigned npm4;
+	unsigned			stype;
+	unsigned			num;
+	unsigned			stride;
+	radeon_state_pm4_t		pm4;
+	struct radeon_sub_type		reginfo[R600_SHADER_MAX];
+	unsigned			base_id;
+	unsigned			npm4;
 };
 
 struct radeon {
diff --git a/src/gallium/winsys/r600/drm/radeon_state.c b/src/gallium/winsys/r600/drm/radeon_state.c
index ef09fdf..d4e622c 100644
--- a/src/gallium/winsys/r600/drm/radeon_state.c
+++ b/src/gallium/winsys/r600/drm/radeon_state.c
@@ -80,15 +80,59 @@ struct radeon_state *radeon_state_shader(struct radeon *radeon, u32 stype, u32 i
 	state->refcount = 1;
 	state->npm4 = found->npm4;
 	state->nstates = found->reginfo[shader_index].nstates;
-	state->states = calloc(1, state->nstates * 4);
-	state->pm4 = calloc(1, found->npm4 * 4);
-	if (state->states == NULL || state->pm4 == NULL) {
-		radeon_state_decref(state);
-		return NULL;
-	}
 	return state;
 }
 
+int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type)
+{
+	struct radeon_stype_info *found = NULL;
+	int i, j, shader_index = -1;
+
+	if (state == NULL)
+		return 0;
+	/* traverse the stype array */
+	for (i = 0; i < state->radeon->nstype; i++) {
+		/* if the type doesn't match, if the shader doesn't match */
+		if (stype != state->radeon->stype[i].stype)
+			continue;
+		if (shader_type) {
+			for (j = 0; j < 4; j++) {
+				if (state->radeon->stype[i].reginfo[j].shader_type == shader_type) {
+					shader_index = j;
+					break;
+				}
+			}
+			if (shader_index == -1)
+				continue;
+		} else {
+			if (state->radeon->stype[i].reginfo[0].shader_type)
+				continue;
+			else
+				shader_index = 0;
+		}
+		if (id > state->radeon->stype[i].num)
+			continue;
+		
+		found = &state->radeon->stype[i];
+		break;
+	}
+
+	if (!found) {
+		fprintf(stderr, "%s invalid type %d/id %d/shader class %d\n", __func__, stype, id, shader_type);
+		return -EINVAL;
+	}
+
+	if (found->reginfo[shader_index].nstates != state->nstates) {
+		fprintf(stderr, "invalid type change from (%d %d %d) to (%d %d %d)\n",
+			state->stype->stype, state->id, state->shader_index, stype, id, shader_index);
+	}
+
+	state->stype = found;
+	state->id = id;
+	state->shader_index = shader_index;
+	return radeon_state_pm4(state);
+}
+
 struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id)
 {
 	return radeon_state_shader(radeon, type, id, 0);
@@ -134,9 +178,6 @@ struct radeon_state *radeon_state_decref(struct radeon_state *state)
 	for (i = 0; i < state->nbo; i++) {
 		state->bo[i] = radeon_bo_decref(state->radeon, state->bo[i]);
 	}
-	free(state->immd);
-	free(state->states);
-	free(state->pm4);
 	memset(state, 0, sizeof(*state));
 	free(state);
 	return NULL;
@@ -179,8 +220,9 @@ int radeon_state_pm4(struct radeon_state *state)
 {
 	int r;
 
-	if (state == NULL || state->cpm4)
+	if (state == NULL)
 		return 0;
+	state->cpm4 = 0;
 	r = state->stype->pm4(state);
 	if (r) {
 		fprintf(stderr, "%s failed to build PM4 for state(%d %d)\n",




More information about the mesa-commit mailing list