[Mesa-dev] [PATCH] r600g: implement texturing with 8x MSAA compressed surfaces for Evergreen

Marek Olšák maraeo at gmail.com
Fri Oct 12 10:11:34 PDT 2012


The 2x and 4x cases are completely broken. The lfdptr instruction returns
garbage there.

The 8x case is broken on Cayman, though at least the result looks somewhat
correct.

I think we're missing some info. Anyway, at least one case works.
---
 src/gallium/auxiliary/util/u_blitter.c       |    8 ++
 src/gallium/auxiliary/util/u_blitter.h       |    6 ++
 src/gallium/drivers/r600/evergreen_state.c   |   39 ++++++--
 src/gallium/drivers/r600/evergreend.h        |    2 +-
 src/gallium/drivers/r600/r600_asm.c          |   10 +-
 src/gallium/drivers/r600/r600_asm.h          |    7 +-
 src/gallium/drivers/r600/r600_blit.c         |   42 ++++----
 src/gallium/drivers/r600/r600_pipe.c         |   29 +++++-
 src/gallium/drivers/r600/r600_pipe.h         |   21 ++++
 src/gallium/drivers/r600/r600_shader.c       |  133 +++++++++++++++++++++++++-
 src/gallium/drivers/r600/r600_sq.h           |    3 +
 src/gallium/drivers/r600/r600_state.c        |    3 +-
 src/gallium/drivers/r600/r600_state_common.c |    4 +-
 13 files changed, 270 insertions(+), 37 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 1072a0e..49bde44 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -359,6 +359,14 @@ void util_blitter_destroy(struct blitter_context *blitter)
    FREE(ctx);
 }
 
+void util_blitter_set_texture_multisample(struct blitter_context *blitter,
+                                          boolean supported)
+{
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+
+   ctx->has_texture_multisample = supported;
+}
+
 static void blitter_set_running_flag(struct blitter_context_priv *ctx)
 {
    if (ctx->base.running) {
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index 4f71467..f75f81c 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -135,6 +135,12 @@ struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
    return blitter->pipe;
 }
 
+/**
+ * Override PIPE_CAP_TEXTURE_MULTISAMPLE as reported by the driver.
+ */
+void util_blitter_set_texture_multisample(struct blitter_context *blitter,
+                                          boolean supported);
+
 /* The default function to draw a rectangle. This can only be used
  * inside of the draw_rectangle callback if the driver overrides it. */
 void util_blitter_draw_rectangle(struct blitter_context *blitter,
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 330c021..967a4af 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -633,7 +633,7 @@ boolean evergreen_is_format_supported(struct pipe_screen *screen,
 		return FALSE;
 
 	if (sample_count > 1) {
-		if (rscreen->info.drm_minor < 19)
+		if (!rscreen->has_msaa)
 			return FALSE;
 
 		switch (sample_count) {
@@ -1081,11 +1081,24 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 				       S_030004_TEX_DEPTH(depth - 1) |
 				       S_030004_ARRAY_MODE(array_mode));
 	view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
-	if (state->u.tex.last_level && texture->nr_samples <= 1) {
+
+	/* TEX_RESOURCE_WORD3.MIP_ADDRESS */
+	if (texture->nr_samples > 1 && rscreen->msaa_texture_support == MSAA_TEXTURE_COMPRESSED) {
+		/* XXX the 2x and 4x cases are broken. */
+		if (tmp->is_depth || tmp->resource.b.b.nr_samples != 8) {
+			/* disable FMASK (0 = disabled) */
+			view->tex_resource_words[3] = 0;
+			view->skip_mip_address_reloc = true;
+		} else {
+			/* FMASK should be in MIP_ADDRESS for multisample textures */
+			view->tex_resource_words[3] = (tmp->fmask_offset + r600_resource_va(ctx->screen, texture)) >> 8;
+		}
+	} else if (state->u.tex.last_level && texture->nr_samples <= 1) {
 		view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
 	} else {
 		view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
 	}
+
 	view->tex_resource_words[4] = (word4 |
 				       S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
 				       S_030010_ENDIAN_SWAP(endian));
@@ -1589,9 +1602,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 			rctx->framebuffer.export_16bpc = false;
 		}
 
-		/* Cayman can fetch from a compressed MSAA colorbuffer,
-		 * so it's pointless to track them. */
-		if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) {
+		if (rtex->fmask_size && rtex->cmask_size) {
 			rctx->framebuffer.compressed_cb_mask |= 1 << i;
 		}
 	}
@@ -2265,13 +2276,15 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
 		r600_write_value(cs, (resource_id_base + resource_index) * 8);
 		r600_write_array(cs, 8, rview->tex_resource_words);
 
-		/* XXX The kernel needs two relocations. This is stupid. */
 		reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
 					      RADEON_USAGE_READ);
 		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
 		r600_write_value(cs, reloc);
-		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-		r600_write_value(cs, reloc);
+
+		if (!rview->skip_mip_address_reloc) {
+			r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+			r600_write_value(cs, reloc);
+		}
 	}
 	state->dirty_mask = 0;
 }
@@ -3364,6 +3377,16 @@ void *evergreen_create_decompress_blend(struct r600_context *rctx)
 	return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_DECOMPRESS);
 }
 
+void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx)
+{
+	struct pipe_blend_state blend;
+
+	memset(&blend, 0, sizeof(blend));
+	blend.independent_blend_enable = true;
+	blend.rt[0].colormask = 0xf;
+	return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_FMASK_DECOMPRESS);
+}
+
 void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
 {
 	struct pipe_depth_stencil_alpha_state dsa = {{0}};
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index d10ec7f..1c8646d 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -485,7 +485,7 @@
 #define      V_028808_CB_ELIMINATE_FAST_CLEAR          0x00000002
 #define      V_028808_CB_RESOLVE                       0x00000003
 #define      V_028808_CB_DECOMPRESS                    0x00000004
-#define      V_028808_CB_FASK_DECOMPRESS               0x00000005
+#define      V_028808_CB_FMASK_DECOMPRESS              0x00000005
 #define   S_028808_ROP3(x)                             (((x) & 0xFF) << 16)
 #define   G_028808_ROP3(x)                             (((x) >> 16) & 0xFF)
 #define   C_028808_ROP3                                0xFF00FFFF
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 066fb67..1fef3b0 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -255,7 +255,10 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
 	return tex;
 }
 
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family)
+void r600_bytecode_init(struct r600_bytecode *bc,
+			enum chip_class chip_class,
+			enum radeon_family family,
+			enum r600_msaa_texture_mode msaa_texture_mode)
 {
 	if ((chip_class == R600) &&
 	    (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) {
@@ -268,6 +271,7 @@ void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, en
 
 	LIST_INITHEAD(&bc->cf);
 	bc->chip_class = chip_class;
+	bc->msaa_texture_mode = msaa_texture_mode;
 }
 
 static int r600_bytecode_add_cf(struct r600_bytecode *bc)
@@ -1736,6 +1740,7 @@ static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecod
 static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
 {
 	bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) |
+			     EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) |
 				S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
 				S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
 				S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
@@ -2766,7 +2771,8 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 	assert(count < 32);
 
 	memset(&bc, 0, sizeof(bc));
-	r600_bytecode_init(&bc, rctx->chip_class, rctx->family);
+	r600_bytecode_init(&bc, rctx->chip_class, rctx->family,
+			   rctx->screen->msaa_texture_support);
 
 	for (i = 0; i < count; i++) {
 		if (elements[i].instance_divisor > 1) {
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 8a9f318..2c7db2c 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -62,6 +62,7 @@ struct r600_bytecode_alu {
 struct r600_bytecode_tex {
 	struct list_head		list;
 	unsigned			inst;
+	unsigned			inst_mod;
 	unsigned			resource_id;
 	unsigned			src_gpr;
 	unsigned			src_rel;
@@ -195,6 +196,7 @@ struct r600_cf_callstack {
 
 struct r600_bytecode {
 	enum chip_class			chip_class;
+	enum r600_msaa_texture_mode	msaa_texture_mode;
 	int				type;
 	struct list_head		cf;
 	struct r600_bytecode_cf		*cf_last;
@@ -219,7 +221,10 @@ struct r600_bytecode {
 int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
 
 /* r600_asm.c */
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family);
+void r600_bytecode_init(struct r600_bytecode *bc,
+			enum chip_class chip_class,
+			enum radeon_family family,
+			enum r600_msaa_texture_mode msaa_texture_mode);
 void r600_bytecode_clear(struct r600_bytecode *bc);
 int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu);
 int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 8597b8d..a19248d 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -252,12 +252,29 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	unsigned layer, level, checked_last_layer, max_layer;
-
-	assert(rctx->chip_class != CAYMAN);
+	void *blend_decompress;
 
 	if (!rtex->dirty_level_mask)
 		return;
 
+	switch (rctx->screen->msaa_texture_support) {
+	case MSAA_TEXTURE_DECOMPRESSED:
+		blend_decompress = rctx->custom_blend_decompress;
+		break;
+	case MSAA_TEXTURE_COMPRESSED:
+		/* XXX the 2x and 4x cases are broken. */
+		if (rtex->resource.b.b.nr_samples == 8)
+			blend_decompress = rctx->custom_blend_fmask_decompress;
+		else
+			blend_decompress = rctx->custom_blend_decompress;
+		break;
+	case MSAA_TEXTURE_SAMPLE_ZERO:
+	default:
+		/* Nothing to do. */
+		rtex->dirty_level_mask = 0;
+		return;
+	}
+
 	for (level = first_level; level <= last_level; level++) {
 		if (!(rtex->dirty_level_mask & (1 << level)))
 			continue;
@@ -278,8 +295,7 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
 			cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
 
 			r600_blitter_begin(ctx, R600_DECOMPRESS);
-			util_blitter_custom_color(rctx->blitter, cbsurf,
-						  rctx->custom_blend_decompress);
+			util_blitter_custom_color(rctx->blitter, cbsurf, blend_decompress);
 			r600_blitter_end(ctx);
 
 			pipe_surface_reference(&cbsurf, NULL);
@@ -299,13 +315,6 @@ void r600_decompress_color_textures(struct r600_context *rctx,
 	unsigned i;
 	unsigned mask = textures->compressed_colortex_mask;
 
-	/* Cayman cannot decompress an MSAA colorbuffer,
-	 * but it can read it compressed, so skip this. */
-	assert(rctx->chip_class != CAYMAN);
-	if (rctx->chip_class == CAYMAN) {
-		return;
-	}
-
 	while (mask) {
 		struct pipe_sampler_view *view;
 		struct r600_texture *tex;
@@ -333,7 +342,6 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
 					unsigned level,
 					unsigned first_layer, unsigned last_layer)
 {
-	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_texture *rtex = (struct r600_texture*)tex;
 
 	if (rtex->is_depth && !rtex->is_flushing_texture) {
@@ -344,7 +352,7 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
 					   level, level,
 					   first_layer, last_layer,
 					   0, u_max_sample(tex));
-	} else if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) {
+	} else if (rtex->fmask_size && rtex->cmask_size) {
 		r600_blit_decompress_color(ctx, rtex, level, level,
 					   first_layer, last_layer);
 	}
@@ -459,6 +467,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
 	struct pipe_sampler_view src_templ, *src_view;
 	unsigned dst_width, dst_height, src_width0, src_height0, src_widthFL, src_heightFL;
 	struct pipe_box sbox;
+	bool copy_all_samples;
 
 	/* Handle buffers first. */
 	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
@@ -558,16 +567,15 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
 							   src_widthFL, src_heightFL);
 	}
 
+	copy_all_samples = rctx->screen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
+
 	/* Copy. */
-	/* XXX Multisample texturing is unimplemented on Cayman. In the meantime,
-	 * copy only the first sample (which is the only one that is uncompressed
-	 * and therefore doesn't return garbage). */
 	r600_blitter_begin(ctx, R600_COPY_TEXTURE);
 	util_blitter_blit_generic(rctx->blitter, dst_view, dstx, dsty,
 				  abs(src_box->width), abs(src_box->height),
 				  src_view, src_box, src_width0, src_height0,
 				  PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
-				  rctx->chip_class != CAYMAN);
+				  copy_all_samples);
 	r600_blitter_end(ctx);
 
 	pipe_surface_reference(&dst_view, NULL);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 5454414..90891c2 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -171,6 +171,9 @@ static void r600_destroy_context(struct pipe_context *context)
 	if (rctx->custom_blend_decompress) {
 		rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_decompress);
 	}
+	if (rctx->custom_blend_fmask_decompress) {
+		rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_fmask_decompress);
+	}
 	util_unreference_framebuffer_state(&rctx->framebuffer.state);
 
 	r600_context_fini(rctx);
@@ -264,6 +267,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 		rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx);
 		rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx);
 		rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx);
+		rctx->custom_blend_fmask_decompress = evergreen_create_fmask_decompress_blend(rctx);
 		rctx->has_vertex_cache = !(rctx->family == CHIP_CEDAR ||
 					   rctx->family == CHIP_PALM ||
 					   rctx->family == CHIP_SUMO ||
@@ -289,6 +293,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 	rctx->blitter = util_blitter_create(&rctx->context);
 	if (rctx->blitter == NULL)
 		goto fail;
+	util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa);
 	rctx->blitter->draw_rectangle = r600_draw_rectangle;
 
 	r600_begin_new_cs(rctx);
@@ -393,7 +398,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_COMPUTE:
 	case PIPE_CAP_START_INSTANCE:
 	case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
-        case PIPE_CAP_TEXTURE_MULTISAMPLE:
 		return 1;
 
 	case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -402,6 +406,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_GLSL_FEATURE_LEVEL:
 		return 130;
 
+	case PIPE_CAP_TEXTURE_MULTISAMPLE:
+		return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
+
 	/* Supported except the original R600. */
 	case PIPE_CAP_INDEP_BLEND_ENABLE:
 	case PIPE_CAP_INDEP_BLEND_FUNC:
@@ -950,6 +957,26 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
 		break;
 	}
 
+	/* MSAA support. */
+	switch (rscreen->chip_class) {
+	case R600:
+	case R700:
+		rscreen->has_msaa = rscreen->info.drm_minor >= 22;
+		rscreen->msaa_texture_support = MSAA_TEXTURE_DECOMPRESSED;
+		break;
+	case EVERGREEN:
+		rscreen->has_msaa = rscreen->info.drm_minor >= 19;
+		rscreen->msaa_texture_support =
+			rscreen->info.drm_minor >= 24 ? MSAA_TEXTURE_COMPRESSED :
+							MSAA_TEXTURE_DECOMPRESSED;
+		break;
+	case CAYMAN:
+		rscreen->has_msaa = rscreen->info.drm_minor >= 19;
+		/* We should be able to read compressed MSAA textures, but it doesn't work. */
+		rscreen->msaa_texture_support = MSAA_TEXTURE_SAMPLE_ZERO;
+		break;
+	}
+
 	if (r600_init_tiling(rscreen)) {
 		FREE(rscreen);
 		return NULL;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 578cbbe..c865b2e 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -183,6 +183,22 @@ struct r600_pipe_fences {
 	pipe_mutex			mutex;
 };
 
+enum r600_msaa_texture_mode {
+	/* If the hw can fetch the first sample only (no decompression available).
+	 * This means MSAA texturing is not fully implemented. */
+	MSAA_TEXTURE_SAMPLE_ZERO,
+
+	/* If the hw can fetch decompressed MSAA textures.
+	 * Supported families: R600, R700, Evergreen.
+	 * Cayman cannot use this, because it cannot do the decompression. */
+	MSAA_TEXTURE_DECOMPRESSED,
+
+	/* If the hw can fetch compressed MSAA textures, which means shaders can
+	 * read resolved FMASK. This yields the best performance.
+	 * Supported families: Evergreen, Cayman. */
+	MSAA_TEXTURE_COMPRESSED
+};
+
 struct r600_screen {
 	struct pipe_screen		screen;
 	struct radeon_winsys		*ws;
@@ -190,6 +206,8 @@ struct r600_screen {
 	enum chip_class			chip_class;
 	struct radeon_info		info;
 	bool				has_streamout;
+	bool				has_msaa;
+	enum r600_msaa_texture_mode	msaa_texture_support;
 	struct r600_tiling_info		tiling_info;
 	struct r600_pipe_fences		fences;
 
@@ -204,6 +222,7 @@ struct r600_pipe_sampler_view {
 	struct pipe_sampler_view	base;
 	struct r600_resource		*tex_resource;
 	uint32_t			tex_resource_words[8];
+	bool				skip_mip_address_reloc;
 };
 
 struct r600_rasterizer_state {
@@ -371,6 +390,7 @@ struct r600_context {
 	void				*custom_dsa_flush;
 	void				*custom_blend_resolve;
 	void				*custom_blend_decompress;
+	void				*custom_blend_fmask_decompress;
 	/* With rasterizer discard, there doesn't have to be a pixel shader.
 	 * In that case, we bind this one: */
 	void				*dummy_pixel_shader;
@@ -520,6 +540,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 void *evergreen_create_db_flush_dsa(struct r600_context *rctx);
 void *evergreen_create_resolve_blend(struct r600_context *rctx);
 void *evergreen_create_decompress_blend(struct r600_context *rctx);
+void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx);
 boolean evergreen_is_format_supported(struct pipe_screen *screen,
 				      enum pipe_format format,
 				      enum pipe_texture_target target,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 053a988..9cd5eee 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1178,7 +1178,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 	ctx.shader = shader;
 	ctx.native_integers = true;
 
-	r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family);
+	r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family,
+			   rscreen->msaa_texture_support);
 	ctx.tokens = tokens;
 	tgsi_scan_shader(tokens, &ctx.info);
 	tgsi_parse_init(&ctx.parse, tokens);
@@ -3794,10 +3795,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 	unsigned src_gpr;
 	int r, i, j;
 	int opcode;
+	bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED &&
+				    inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
+				    (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
+				     inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
 	/* Texture fetch instructions can only use gprs as source.
 	 * Also they cannot negate the source or take the absolute value */
-	const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
-                                             tgsi_tex_src_requires_loading(ctx, 0);
+	const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
+                                              tgsi_tex_src_requires_loading(ctx, 0)) ||
+					     read_compressed_msaa;
 	boolean src_loaded = FALSE;
 	unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
 	uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
@@ -4068,6 +4074,127 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		src_gpr = ctx->temp_reg;
 	}
 
+	/* Obtain the sample index for reading a compressed MSAA color texture.
+	 * To read the FMASK, we use the ldfptr instruction, which tells us
+	 * where the samples are stored.
+	 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
+	 * which is the identity mapping. Each nibble says which physical sample
+	 * should be fetched to get that sample.
+	 *
+	 * Assume src.z contains the sample index. It should be modified like this:
+	 *   src.z = (ldfptr() >> (src.z * 4)) & 0xF;
+	 * Then fetch the texel with src.
+	 */
+	if (read_compressed_msaa) {
+		unsigned sample_chan = inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ? 3 : 4;
+		unsigned temp = r600_get_temp(ctx);
+		assert(src_loaded);
+
+		/* temp.w = ldfptr() */
+		memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+		tex.inst = SQ_TEX_INST_LD;
+		tex.inst_mod = 1; /* to indicate this is ldfptr */
+		tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+		tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+		tex.src_gpr = src_gpr;
+		tex.dst_gpr = temp;
+		tex.dst_sel_x = 7; /* mask out these components */
+		tex.dst_sel_y = 7;
+		tex.dst_sel_z = 7;
+		tex.dst_sel_w = 0; /* store X */
+		tex.src_sel_x = 0;
+		tex.src_sel_y = 1;
+		tex.src_sel_z = 2;
+		tex.src_sel_w = 3;
+		tex.offset_x = offset_x;
+		tex.offset_y = offset_y;
+		tex.offset_z = offset_z;
+		r = r600_bytecode_add_tex(ctx->bc, &tex);
+		if (r)
+			return r;
+
+		/* temp.x = sample_index*4 */
+		if (ctx->bc->chip_class == CAYMAN) {
+			for (i = 0 ; i < 4; i++) {
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+				alu.inst = ctx->inst_info->r600_opcode;
+				alu.src[0].sel = src_gpr;
+				alu.src[0].chan = sample_chan;
+				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+				alu.src[1].value = 4;
+				alu.dst.sel = temp;
+				alu.dst.chan = i;
+				alu.dst.write = i == 0;
+				if (i == 3)
+					alu.last = 1;
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
+				if (r)
+					return r;
+			}
+		} else {
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT);
+			alu.src[0].sel = src_gpr;
+			alu.src[0].chan = sample_chan;
+			alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+			alu.src[1].value = 4;
+			alu.dst.sel = temp;
+			alu.dst.chan = 0;
+			alu.dst.write = 1;
+			alu.last = 1;
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
+			if (r)
+				return r;
+		}
+
+		/* sample_index = temp.w >> temp.x */
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT);
+		alu.src[0].sel = temp;
+		alu.src[0].chan = 3;
+		alu.src[1].sel = temp;
+		alu.src[1].chan = 0;
+		alu.dst.sel = src_gpr;
+		alu.dst.chan = sample_chan;
+		alu.dst.write = 1;
+		alu.last = 1;
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+
+		/* sample_index & 0xF */
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
+		alu.src[0].sel = src_gpr;
+		alu.src[0].chan = sample_chan;
+		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+		alu.src[1].value = 0xF;
+		alu.dst.sel = src_gpr;
+		alu.dst.chan = sample_chan;
+		alu.dst.write = 1;
+		alu.last = 1;
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+#if 0
+		/* visualize the FMASK */
+		for (i = 0; i < 4; i++) {
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+			alu.src[0].sel = src_gpr;
+			alu.src[0].chan = sample_chan;
+			alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+			alu.dst.chan = i;
+			alu.dst.write = 1;
+			alu.last = 1;
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
+			if (r)
+				return r;
+		}
+		return 0;
+#endif
+	}
+
 	opcode = ctx->inst_info->r600_opcode;
 	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
 	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 4b2a19a..587f88d 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -375,6 +375,9 @@
 #define   S_SQ_TEX_WORD0_BC_FRAC_MODE(x)                             (((x) & 0x1) << 5)
 #define   G_SQ_TEX_WORD0_BC_FRAC_MODE(x)                             (((x) >> 5) & 0x1)
 #define   C_SQ_TEX_WORD0_BC_FRAC_MODE                                0xFFFFFFDF
+#define   EG_S_SQ_TEX_WORD0_INST_MOD(x)                                 (((x) & 0x3) << 5)
+#define   EG_G_SQ_TEX_WORD0_INST_MOD(x)                                 (((x) >> 5) & 0x3)
+#define   EG_C_SQ_TEX_WORD0_INST_MOD                                    0xFFFFFF9F
 #define   S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x)                         (((x) & 0x1) << 7)
 #define   G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x)                         (((x) >> 7) & 0x1)
 #define   C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD                            0xFFFFFF7F
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 7ae4558..175287c 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -585,7 +585,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
 		return FALSE;
 
 	if (sample_count > 1) {
-		if (rscreen->info.drm_minor < 22)
+		if (!rscreen->has_msaa)
 			return FALSE;
 
 		/* R11G11B10 is broken on R6xx. */
@@ -1994,7 +1994,6 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
 		r600_write_value(cs, (resource_id_base + resource_index) * 7);
 		r600_write_array(cs, 7, rview->tex_resource_words);
 
-		/* XXX The kernel needs two relocations. This is stupid. */
 		reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
 					      RADEON_USAGE_READ);
 		r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index ef18f6b..0b423be 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -593,8 +593,8 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
 				dst->views.compressed_depthtex_mask &= ~(1 << i);
 			}
 
-			/* Track compressed colorbuffers for Evergreen (Cayman doesn't need this). */
-			if (rctx->chip_class != CAYMAN && rtex->cmask_size && rtex->fmask_size) {
+			/* Track compressed colorbuffers. */
+			if (rtex->cmask_size && rtex->fmask_size) {
 				dst->views.compressed_colortex_mask |= 1 << i;
 			} else {
 				dst->views.compressed_colortex_mask &= ~(1 << i);
-- 
1.7.9.5



More information about the mesa-dev mailing list