Mesa (master): r600g: mipmap early support + EX2/ABS instruction + culling

Jerome Glisse glisse at kemper.freedesktop.org
Thu Jul 29 23:07:22 UTC 2010


Module: Mesa
Branch: master
Commit: 7a73390f9126fd270d9891cd9d2bf38ef56d9b80
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7a73390f9126fd270d9891cd9d2bf38ef56d9b80

Author: Jerome Glisse <jglisse at redhat.com>
Date:   Thu Jul 29 14:51:06 2010 -0400

r600g: mipmap early support + EX2/ABS instruction + culling

Add mipmap support (demos/src/redbook/mipmap is working)
Add EX2/ABS shader instruction support.
Add face culling support.

Misc fixes.

Signed-off-by: Jerome Glisse <jglisse at redhat.com>

---

 src/gallium/drivers/r600/r600_asm.c      |    2 +
 src/gallium/drivers/r600/r600_resource.h |    4 +-
 src/gallium/drivers/r600/r600_shader.c   |   16 +++++---
 src/gallium/drivers/r600/r600_state.c    |   56 +++++++++++++++++++++++-------
 src/gallium/drivers/r600/r600_texture.c  |   24 ++++++-------
 src/gallium/drivers/r600/r600d.h         |   40 +++++++++++++++++++++
 6 files changed, 108 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index e678a2f..e560f65 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -294,6 +294,7 @@ int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
 					S_SQ_ALU_WORD0_LAST(alu->last);
 		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
 					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+					S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
 					S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
 					S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
 					S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
@@ -309,6 +310,7 @@ int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
 					S_SQ_ALU_WORD0_LAST(alu->last);
 		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
 					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+					S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
 					S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
 					S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
 					S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 0139a3b..bb90e76 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -44,9 +44,9 @@ struct r600_resource_texture {
 	struct r600_resource		resource;
 	unsigned long			offset[PIPE_MAX_TEXTURE_LEVELS];
 	unsigned long			pitch[PIPE_MAX_TEXTURE_LEVELS];
-	unsigned long			stride[PIPE_MAX_TEXTURE_LEVELS];
 	unsigned long			layer_size[PIPE_MAX_TEXTURE_LEVELS];
-	unsigned long			stride_override;
+	unsigned long			pitch_override;
+	unsigned long			bpt;
 	unsigned long			size;
 };
 
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 3f1979b..c61cc11 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -249,10 +249,6 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
 		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
 		return -EINVAL;
 	}
-	if (i->Instruction.Saturate) {
-		R600_ERR("staturate unsupported\n");
-		return -EINVAL;
-	}
 	if (i->Instruction.Predicate) {
 		R600_ERR("predicate unsupported\n");
 		return -EINVAL;
@@ -507,10 +503,15 @@ static int tgsi_dst(struct r600_shader_ctx *ctx,
 			unsigned swizzle,
 			struct r600_bc_alu_dst *r600_dst)
 {
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+
 	r600_dst->sel = tgsi_dst->Register.Index;
 	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
 	r600_dst->chan = swizzle;
 	r600_dst->write = 1;
+	if (inst->Instruction.Saturate) {
+		r600_dst->clamp = 1;
+	}
 	return 0;
 }
 
@@ -540,6 +541,9 @@ static int tgsi_op2(struct r600_shader_ctx *ctx)
 		case TGSI_OPCODE_SUB:
 			alu.src[1].neg = 1;
 			break;
+		case TGSI_OPCODE_ABS:
+			alu.src[0].abs = 1;
+			break;
 		default:
 			break;
 		}
@@ -1040,13 +1044,13 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans},
 	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	/* gap */
 	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
 	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 57879e8..0191070 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -24,6 +24,7 @@
  *      Jerome Glisse
  */
 #include <stdio.h>
+#include <errno.h>
 #include "util/u_inlines.h"
 #include "util/u_format.h"
 #include "util/u_memory.h"
@@ -649,8 +650,8 @@ static struct radeon_state *r600_cb0(struct r600_context *rctx)
 	rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
 	rstate->placement[4] = RADEON_GEM_DOMAIN_GTT;
 	rstate->nbo = 3;
-	pitch = rtex->pitch[level] / 8 - 1;
-	slice = rtex->pitch[level] * state->cbufs[0]->height / 64 - 1;
+	pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
+	slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[0]->height / 64 - 1;
 	rstate->states[R600_CB0__CB_COLOR0_BASE] = 0x00000000;
 	rstate->states[R600_CB0__CB_COLOR0_INFO] = 0x08110068;
 	rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) |
@@ -666,6 +667,22 @@ static struct radeon_state *r600_cb0(struct r600_context *rctx)
 	return rstate;
 }
 
+int r600_db_format(unsigned pformat, unsigned *format)
+{
+	switch (pformat) {
+	case PIPE_FORMAT_Z24X8_UNORM:
+		*format = V_028010_DEPTH_X8_24;
+		return 0;
+	case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+		*format = V_028010_DEPTH_8_24;
+		return 0;
+	default:
+		*format = V_028010_DEPTH_INVALID;
+		R600_ERR("unsupported %d\n", pformat);
+		return -EINVAL;
+	}
+}
+
 static struct radeon_state *r600_db(struct r600_context *rctx)
 {
 	struct r600_screen *rscreen = rctx->screen;
@@ -674,7 +691,7 @@ static struct radeon_state *r600_db(struct r600_context *rctx)
 	struct radeon_state *rstate;
 	const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer;
 	unsigned level = state->cbufs[0]->level;
-	unsigned pitch, slice;
+	unsigned pitch, slice, format;
 
 	if (state->zsbuf == NULL)
 		return NULL;
@@ -689,10 +706,15 @@ static struct radeon_state *r600_db(struct r600_context *rctx)
 	rstate->nbo = 1;
 	rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM;
 	level = state->zsbuf->level;
-	pitch = rtex->pitch[level] / 8 - 1;
-	slice = rtex->pitch[level] * state->zsbuf->height / 64 - 1;
+	pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
+	slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1;
+	if (r600_db_format(state->zsbuf->texture->format, &format)) {
+		radeon_state_decref(rstate);
+		return NULL;
+	}
 	rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000;
-	rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010006;
+	rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010000 |
+					S_028010_FORMAT(format);
 	rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000;
 	rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1;
 	rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) |
@@ -716,7 +738,10 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
 		return NULL;
 	rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001;
 	rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000;
-	rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000;
+	rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 |
+			S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
+			S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
+			S_028814_FACE(!state->front_ccw);
 	rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = 0x00000000;
 	rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000;
 	rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = 0x00080008;
@@ -910,6 +935,11 @@ static inline unsigned r600_tex_compare(unsigned compare)
 	}
 }
 
+static INLINE u32 S_FIXED(float value, u32 frac_bits)
+{
+	return value * (1 << frac_bits);
+}
+
 static struct radeon_state *r600_sampler(struct r600_context *rctx,
 				const struct pipe_sampler_state *state,
 				unsigned id)
@@ -930,9 +960,9 @@ static struct radeon_state *r600_sampler(struct r600_context *rctx,
 			S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func));
 	/* FIXME LOD it depends on texture base level ... */
 	rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] =
-			S_03C004_MIN_LOD(0) |
-			S_03C004_MAX_LOD(0) |
-			S_03C004_LOD_BIAS(0);
+			S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
+			S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
+			S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6));
 	rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1);
 	if (radeon_state_pm4(rstate)) {
 		radeon_state_decref(rstate);
@@ -1020,7 +1050,7 @@ static struct radeon_state *r600_resource(struct r600_context *rctx,
 	/* FIXME properly handle first level != 0 */
 	rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] =
 			S_038000_DIM(r600_tex_dim(view->texture->target)) |
-			S_038000_PITCH((tmp->pitch[0] / 8) - 1) |
+			S_038000_PITCH(((tmp->pitch[0] / tmp->bpt) / 8) - 1) |
 			S_038000_TEX_WIDTH(view->texture->width0 - 1);
 	rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] =
 			S_038004_TEX_HEIGHT(view->texture->height0 - 1) |
@@ -1036,9 +1066,9 @@ static struct radeon_state *r600_resource(struct r600_context *rctx,
 			S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) |
 			S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) |
 			S_038010_REQUEST_SIZE(1) |
-			S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_r)) |
+			S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_b)) |
 			S_038010_DST_SEL_Y(r600_tex_swizzle(view->swizzle_g)) |
-			S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_b)) |
+			S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_r)) |
 			S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) |
 			S_038010_BASE_LEVEL(view->first_level);
 	rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] =
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index ab20e97..96173b0 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -59,24 +59,22 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex,
 static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource_texture *rtex)
 {
 	struct pipe_resource *ptex = &rtex->resource.base.b;
-	unsigned long w, h, stride, size, layer_size, i, offset;
+	unsigned long w, h, pitch, size, layer_size, i, offset;
 
+	rtex->bpt = util_format_get_blocksize(ptex->format);
 	for (i = 0, offset = 0; i <= ptex->last_level; i++) {
 		w = u_minify(ptex->width0, i);
 		h = u_minify(ptex->height0, i);
-		stride = align(util_format_get_stride(ptex->format, w), 32);
-		layer_size = stride * h;
+		pitch = util_format_get_stride(ptex->format, align(w, 64));
+		layer_size = pitch * h;
 		if (ptex->target == PIPE_TEXTURE_CUBE)
 			size = layer_size * 6;
 		else
 			size = layer_size * u_minify(ptex->depth0, i);
 		rtex->offset[i] = offset;
 		rtex->layer_size[i] = layer_size;
-		rtex->pitch[i] = stride / util_format_get_blocksize(ptex->format);
-		rtex->pitch[i] += R600_TEXEL_PITCH_ALIGNMENT_MASK;
-		rtex->pitch[i] &= ~R600_TEXEL_PITCH_ALIGNMENT_MASK;
-		rtex->stride[i] = stride;
-		offset += align(size, 32);
+		rtex->pitch[i] = pitch;
+		offset += size;
 	}
 	rtex->size = offset;
 }
@@ -183,11 +181,11 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
 	pipe_reference_init(&resource->base.b.reference, 1);
 	resource->base.b.screen = screen;
 	resource->bo = bo;
-	rtex->stride_override = whandle->stride;
-	rtex->pitch[0] = whandle->stride / util_format_get_blocksize(templ->format);
-	rtex->stride[0] = whandle->stride;
+	rtex->pitch_override = whandle->stride;
+	rtex->bpt = util_format_get_blocksize(templ->format);
+	rtex->pitch[0] = whandle->stride;
 	rtex->offset[0] = 0;
-	rtex->size = align(rtex->stride[0] * templ->height0, 32);
+	rtex->size = align(rtex->pitch[0] * templ->height0, 64);
 
 	return &resource->base.b;
 }
@@ -216,7 +214,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
 	trans->transfer.sr = sr;
 	trans->transfer.usage = usage;
 	trans->transfer.box = *box;
-	trans->transfer.stride = rtex->stride[sr.level];
+	trans->transfer.stride = rtex->pitch[sr.level];
 	trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face);
 	return &trans->transfer;
 }
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 593b95c..c1acfcd 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -316,6 +316,46 @@
 #define   S_028010_ZRANGE_PRECISION(x)                 (((x) & 0x1) << 31)
 #define   G_028010_ZRANGE_PRECISION(x)                 (((x) >> 31) & 0x1)
 #define   C_028010_ZRANGE_PRECISION                    0x7FFFFFFF
+#define R_028814_PA_SU_SC_MODE_CNTL                  0x028814
+#define   S_028814_CULL_FRONT(x)                       (((x) & 0x1) << 0)
+#define   G_028814_CULL_FRONT(x)                       (((x) >> 0) & 0x1)
+#define   C_028814_CULL_FRONT                          0xFFFFFFFE
+#define   S_028814_CULL_BACK(x)                        (((x) & 0x1) << 1)
+#define   G_028814_CULL_BACK(x)                        (((x) >> 1) & 0x1)
+#define   C_028814_CULL_BACK                           0xFFFFFFFD
+#define   S_028814_FACE(x)                             (((x) & 0x1) << 2)
+#define   G_028814_FACE(x)                             (((x) >> 2) & 0x1)
+#define   C_028814_FACE                                0xFFFFFFFB
+#define   S_028814_POLY_MODE(x)                        (((x) & 0x3) << 3)
+#define   G_028814_POLY_MODE(x)                        (((x) >> 3) & 0x3)
+#define   C_028814_POLY_MODE                           0xFFFFFFE7
+#define   S_028814_POLYMODE_FRONT_PTYPE(x)             (((x) & 0x7) << 5)
+#define   G_028814_POLYMODE_FRONT_PTYPE(x)             (((x) >> 5) & 0x7)
+#define   C_028814_POLYMODE_FRONT_PTYPE                0xFFFFFF1F
+#define   S_028814_POLYMODE_BACK_PTYPE(x)              (((x) & 0x7) << 8)
+#define   G_028814_POLYMODE_BACK_PTYPE(x)              (((x) >> 8) & 0x7)
+#define   C_028814_POLYMODE_BACK_PTYPE                 0xFFFFF8FF
+#define   S_028814_POLY_OFFSET_FRONT_ENABLE(x)         (((x) & 0x1) << 11)
+#define   G_028814_POLY_OFFSET_FRONT_ENABLE(x)         (((x) >> 11) & 0x1)
+#define   C_028814_POLY_OFFSET_FRONT_ENABLE            0xFFFFF7FF
+#define   S_028814_POLY_OFFSET_BACK_ENABLE(x)          (((x) & 0x1) << 12)
+#define   G_028814_POLY_OFFSET_BACK_ENABLE(x)          (((x) >> 12) & 0x1)
+#define   C_028814_POLY_OFFSET_BACK_ENABLE             0xFFFFEFFF
+#define   S_028814_POLY_OFFSET_PARA_ENABLE(x)          (((x) & 0x1) << 13)
+#define   G_028814_POLY_OFFSET_PARA_ENABLE(x)          (((x) >> 13) & 0x1)
+#define   C_028814_POLY_OFFSET_PARA_ENABLE             0xFFFFDFFF
+#define   S_028814_VTX_WINDOW_OFFSET_ENABLE(x)         (((x) & 0x1) << 16)
+#define   G_028814_VTX_WINDOW_OFFSET_ENABLE(x)         (((x) >> 16) & 0x1)
+#define   C_028814_VTX_WINDOW_OFFSET_ENABLE            0xFFFEFFFF
+#define   S_028814_PROVOKING_VTX_LAST(x)               (((x) & 0x1) << 19)
+#define   G_028814_PROVOKING_VTX_LAST(x)               (((x) >> 19) & 0x1)
+#define   C_028814_PROVOKING_VTX_LAST                  0xFFF7FFFF
+#define   S_028814_PERSP_CORR_DIS(x)                   (((x) & 0x1) << 20)
+#define   G_028814_PERSP_CORR_DIS(x)                   (((x) >> 20) & 0x1)
+#define   C_028814_PERSP_CORR_DIS                      0xFFEFFFFF
+#define   S_028814_MULTI_PRIM_IB_ENA(x)                (((x) & 0x1) << 21)
+#define   G_028814_MULTI_PRIM_IB_ENA(x)                (((x) >> 21) & 0x1)
+#define   C_028814_MULTI_PRIM_IB_ENA                   0xFFDFFFFF
 #define R_028000_DB_DEPTH_SIZE                       0x028000
 #define   S_028000_PITCH_TILE_MAX(x)                   (((x) & 0x3FF) << 0)
 #define   G_028000_PITCH_TILE_MAX(x)                   (((x) >> 0) & 0x3FF)




More information about the mesa-commit mailing list