[Mesa-dev] [PATCH] r600g: Implement GL_ARB_sample_shading

Glenn Kennard glenn.kennard at gmail.com
Sun Aug 31 15:02:18 PDT 2014


Signed-off-by: Glenn Kennard <glenn.kennard at gmail.com>
---
Tested on radeon 6670, all sample shading piglits pass, no
regressions, as well as unigine valley basic, tesseract with
MSAA enabled.

It would be great if one or more people could test this
on pre-evergreen hardware, and cayman, since I don't
have any such hardware to test with.

Added a comment on a pre-existing bug discovered while
implementing sample shading where driver const buffers
can alias user provided ones.

 docs/GL3.txt                                 |   4 +-
 docs/relnotes/10.4.html                      |  62 ++++++
 src/gallium/drivers/r600/evergreen_state.c   | 104 ++++++---
 src/gallium/drivers/r600/evergreend.h        |   3 +
 src/gallium/drivers/r600/r600_pipe.c         |   2 +-
 src/gallium/drivers/r600/r600_pipe.h         |   9 +
 src/gallium/drivers/r600/r600_shader.c       | 305 +++++++++++++++++++++------
 src/gallium/drivers/r600/r600_shader.h       |   7 +-
 src/gallium/drivers/r600/r600_state.c        |  43 +++-
 src/gallium/drivers/r600/r600d.h             |   3 +
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp |  21 +-
 11 files changed, 444 insertions(+), 119 deletions(-)
 create mode 100644 docs/relnotes/10.4.html

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 56c4994..5baacc1 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -104,13 +104,13 @@ GL 4.0, GLSL 4.00:
   - Fused multiply-add                                 DONE ()
   - Packing/bitfield/conversion functions              DONE (r600)
   - Enhanced textureGather                             DONE (r600, radeonsi)
-  - Geometry shader instancing                         DONE ()
+  - Geometry shader instancing                         DONE (r600)
   - Geometry shader multiple streams                   DONE ()
   - Enhanced per-sample shading                        DONE (r600)
   - Interpolation functions                            DONE ()
   - New overload resolution rules                      DONE
   GL_ARB_gpu_shader_fp64                               started (Dave)
-  GL_ARB_sample_shading                                DONE (i965, nv50, nvc0, radeonsi)
+  GL_ARB_sample_shading                                DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_shader_subroutine                             not started
   GL_ARB_tessellation_shader                           started (Chris, Ilia)
   GL_ARB_texture_buffer_object_rgb32                   DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
diff --git a/docs/relnotes/10.4.html b/docs/relnotes/10.4.html
new file mode 100644
index 0000000..d56275d
--- /dev/null
+++ b/docs/relnotes/10.4.html
@@ -0,0 +1,62 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.4 Release Notes / TBD</h1>
+
+<p>
+Mesa 10.4 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 10.4.1.
+</p>
+<p>
+Mesa 10.4 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+TBD.
+</pre>
+
+
+<h2>New features</h2>
+
+<p>
+Note: some of the new features are only available with certain drivers.
+</p>
+
+<ul>
+<li>GL_ARB_sample_shading on r600</li>
+</ul>
+
+
+<h2>Bug fixes</h2>
+
+TBD.
+
+<h2>Changes</h2>
+
+<ul>
+</ul>
+
+</div>
+</body>
+</html>
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 9f0e82d..9531893 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1398,7 +1398,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 
 	/* MSAA. */
 	if (rctx->b.chip_class == EVERGREEN)
-		rctx->framebuffer.atom.num_dw += 14; /* Evergreen */
+		rctx->framebuffer.atom.num_dw += 17; /* Evergreen */
 	else
 		rctx->framebuffer.atom.num_dw += 28; /* Cayman */
 
@@ -1418,8 +1418,36 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 	}
 
 	rctx->framebuffer.atom.dirty = true;
+
+	/* set sample xy locations as array of fragment shader constants */
+	{
+		struct pipe_constant_buffer constbuf = {0};
+		float values[4*16] = {0.0f};
+		int i;
+		assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
+		for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
+			ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &values[4*i]);
+		}
+		constbuf.user_buffer = values;
+		constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
+		ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
+								 R600_SAMPLE_POSITIONS_CONST_BUFFER, &constbuf);
+		pipe_resource_reference(&constbuf.buffer, NULL);
+	}
 }
 
+static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+
+	if (rctx->ps_iter_samples == min_samples)
+		return;
+
+	rctx->ps_iter_samples = min_samples;
+	if (rctx->framebuffer.nr_samples > 1) {
+		rctx->framebuffer.atom.dirty = true;
+	}
+}
 
 /* 8xMSAA */
 static uint32_t sample_locs_8x[] = {
@@ -1473,7 +1501,7 @@ static void evergreen_get_sample_position(struct pipe_context *ctx,
 	}
 }
 
-static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples)
+static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples)
 {
 
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
@@ -1506,10 +1534,12 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 				     S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
 		radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
 				     S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
+		r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
 	} else {
 		r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
 		radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
 		radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
+		r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
 	}
 }
 
@@ -1670,10 +1700,10 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 	radeon_emit(cs, br); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */
 
 	if (rctx->b.chip_class == EVERGREEN) {
-		evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples);
+		evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples, rctx->ps_iter_samples);
 	} else {
 		cayman_emit_msaa_sample_locs(cs, rctx->framebuffer.nr_samples);
-		cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, 1);
+		cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, rctx->ps_iter_samples);
 	}
 }
 
@@ -2427,8 +2457,6 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
 		r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
 	}
 
-	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
-
 	/* The cs checker requires this register to be set. */
 	r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
 
@@ -2781,11 +2809,19 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 	struct r600_command_buffer *cb = &shader->command_buffer;
 	struct r600_shader *rshader = &shader->shader;
 	unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control = 0;
-	int pos_index = -1, face_index = -1;
+	int pos_index = -1, face_index = -1, fixed_pt_position_index = -1;
 	int ninterp = 0;
-	boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
-	unsigned spi_baryc_cntl, sid, tmp, num = 0;
-	unsigned z_export = 0, stencil_export = 0;
+	boolean have_perspective = FALSE, have_linear = FALSE;
+	static const unsigned spi_baryc_enable_bit[6] = {
+		S_0286E0_PERSP_SAMPLE_ENA(1),
+		S_0286E0_PERSP_CENTER_ENA(1),
+		S_0286E0_PERSP_CENTROID_ENA(1),
+		S_0286E0_LINEAR_SAMPLE_ENA(1),
+		S_0286E0_LINEAR_CENTER_ENA(1),
+		S_0286E0_LINEAR_CENTROID_ENA(1)
+	};
+	unsigned spi_baryc_cntl = 0, sid, tmp, num = 0;
+	unsigned z_export = 0, stencil_export = 0, mask_export = 0;
 	unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
 	uint32_t spi_ps_input_cntl[32];
 
@@ -2808,14 +2844,20 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 			if (face_index == -1)
 				face_index = i; /* lives in same register, same enable bit */
 		}
+		else if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID) {
+			if (fixed_pt_position_index == -1)
+				fixed_pt_position_index = i;
+		}
 		else {
 			ninterp++;
-			if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
-				have_linear = TRUE;
-			if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
-				have_perspective = TRUE;
-			if (rshader->input[i].centroid)
-				have_centroid = TRUE;
+			int k = eg_get_interpolator_index(
+				rshader->input[i].interpolate,
+				rshader->input[i].interpolate_location);
+			if (k >= 0) {
+				spi_baryc_cntl |= spi_baryc_enable_bit[k];
+				have_perspective |= k < 3;
+				have_linear |= !(k < 3);
+			}
 		}
 
 		sid = rshader->input[i].spi_sid;
@@ -2847,17 +2889,22 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 			z_export = 1;
 		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
 			stencil_export = 1;
+		if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK &&
+			rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0)
+			mask_export = 1;
 	}
 	if (rshader->uses_kill)
 		db_shader_control |= S_02880C_KILL_ENABLE(1);
 
 	db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export);
 	db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export);
+	db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export);
 
 	exports_ps = 0;
 	for (i = 0; i < rshader->noutput; i++) {
 		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
-		    rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+		    rshader->output[i].name == TGSI_SEMANTIC_STENCIL ||
+		    rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK)
 			exports_ps |= 1;
 	}
 
@@ -2873,6 +2920,8 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 		ninterp = 1;
 		have_perspective = TRUE;
 	}
+	if (!spi_baryc_cntl)
+		spi_baryc_cntl |= spi_baryc_enable_bit[0];
 
 	if (!have_perspective && !have_linear)
 		have_perspective = TRUE;
@@ -2883,7 +2932,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 	spi_input_z = 0;
 	if (pos_index != -1) {
 		spi_ps_in_control_0 |=  S_0286CC_POSITION_ENA(1) |
-			S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
+			S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) |
 			S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr);
 		spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
 	}
@@ -2893,14 +2942,10 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 		spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
 			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
 	}
-
-	spi_baryc_cntl = 0;
-	if (have_perspective)
-		spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) |
-				  S_0286E0_PERSP_CENTROID_ENA(have_centroid);
-	if (have_linear)
-		spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) |
-				  S_0286E0_LINEAR_CENTROID_ENA(have_centroid);
+	if (fixed_pt_position_index != -1) {
+		spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) |
+			S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr);
+	}
 
 	r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2);
 	r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */
@@ -2919,7 +2964,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
 
 	shader->db_shader_control = db_shader_control;
-	shader->ps_depth_export = z_export | stencil_export;
+	shader->ps_depth_export = z_export | stencil_export | mask_export;
 
 	shader->sprite_coord_enable = sprite_coord_enable;
 	if (rctx->rasterizer)
@@ -2963,8 +3008,8 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
 
 	if (rctx->screen->b.info.drm_minor >= 35) {
 		r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT,
-				S_028B90_CNT(0) |
-				S_028B90_ENABLE(0));
+				S_028B90_CNT(MIN2(rshader->gs_num_invocations, 127)) |
+				S_028B90_ENABLE(rshader->gs_num_invocations > 0));
 	}
 	r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
 	r600_store_value(cb, cp_shader->ring_item_size >> 2);
@@ -3441,6 +3486,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	rctx->b.b.create_sampler_view = evergreen_create_sampler_view;
 	rctx->b.b.set_framebuffer_state = evergreen_set_framebuffer_state;
 	rctx->b.b.set_polygon_stipple = evergreen_set_polygon_stipple;
+	rctx->b.b.set_min_samples = evergreen_set_min_samples;
 	rctx->b.b.set_scissor_states = evergreen_set_scissor_states;
 
 	if (rctx->b.chip_class == EVERGREEN)
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 784d495..4989996 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -803,6 +803,9 @@
 #define   S_02880C_KILL_ENABLE(x)                      (((x) & 0x1) << 6)
 #define   G_02880C_KILL_ENABLE(x)                      (((x) >> 6) & 0x1)
 #define   C_02880C_KILL_ENABLE                         0xFFFFFFBF
+#define   S_02880C_MASK_EXPORT_ENABLE(x)               (((x) & 0x1) << 8)
+#define   G_02880C_MASK_EXPORT_ENABLE(x)               (((x) >> 8) & 0x1)
+#define   C_02880C_MASK_EXPORT_ENABLE                  0XFFFFFEFF
 #define   S_02880C_DUAL_EXPORT_ENABLE(x)               (((x) & 0x1) << 9)
 #define   G_02880C_DUAL_EXPORT_ENABLE(x)               (((x) >> 9) & 0x1)
 #define   C_02880C_DUAL_EXPORT_ENABLE                  0xFFFFFDFF
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 566e724..01ea2a1 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -261,6 +261,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
 	case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
 	case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+	case PIPE_CAP_SAMPLE_SHADING:
 		return 1;
 
 	case PIPE_CAP_COMPUTE:
@@ -316,7 +317,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
 	case PIPE_CAP_VERTEX_COLOR_CLAMPED:
 	case PIPE_CAP_USER_VERTEX_BUFFERS:
-	case PIPE_CAP_SAMPLE_SHADING:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 	case PIPE_CAP_DRAW_INDIRECT:
 	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 8907d41..8152702 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -52,6 +52,14 @@
 #define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
 #define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
 #define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3)
+/* Currently R600_MAX_CONST_BUFFERS is too large, the hardware only has 16 buffers, but the driver is
+ * trying to use 17. Avoid accidentally aliasing with user UBOs for SAMPLE_POSITIONS by using an id<16.
+ * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
+ *
+ * Fixing this properly would require the driver to combine its buffers into a single hardware buffer,
+ * which would also allow supporting the d3d 11 mandated minimum of 15 user const buffers.
+ */
+#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
 
 #define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))
 
@@ -452,6 +460,7 @@ struct r600_context {
 	bool				force_blend_disable;
 	boolean				dual_src_blend;
 	unsigned			zwritemask;
+	int					ps_iter_samples;
 
 	/* Index buffer. */
 	struct pipe_index_buffer	index_buffer;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 3f089b4..fd7d845 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -64,6 +64,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 				 struct r600_pipe_shader *pipeshader,
 				 struct r600_shader_key key);
 
+
 static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
                            int size, unsigned comp_mask) {
 
@@ -267,6 +268,11 @@ struct r600_shader_src {
 	uint32_t				value[4];
 };
 
+struct eg_interp {
+	boolean					enabled;
+	unsigned				ij_index;
+};
+
 struct r600_shader_ctx {
 	struct tgsi_shader_info			info;
 	struct tgsi_parse_context		parse;
@@ -283,13 +289,11 @@ struct r600_shader_ctx {
 	uint32_t				max_driver_temp_used;
 	boolean use_llvm;
 	/* needed for evergreen interpolation */
-	boolean                                 input_centroid;
-	boolean                                 input_linear;
-	boolean                                 input_perspective;
-	int					num_interp_gpr;
+	struct eg_interp		eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid
 	/* evergreen/cayman also store sample mask in face register */
 	int					face_gpr;
-	boolean					has_samplemask;
+	/* sample id is .w component stored in fixed point position register */
+	int					fixed_pt_position_gpr;
 	int					colors_used;
 	boolean                 clip_vertex_write;
 	unsigned                cv_output;
@@ -320,6 +324,12 @@ static int tgsi_endif(struct r600_shader_ctx *ctx);
 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
 static int tgsi_endloop(struct r600_shader_ctx *ctx);
 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
+                                unsigned int cb_idx, unsigned int offset, unsigned ar_chan,
+                                unsigned int dst_reg);
+static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
+			const struct r600_shader_src *shader_src,
+			unsigned chan);
 
 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
 {
@@ -364,27 +374,41 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
-static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
-		int input)
+int eg_get_interpolator_index(unsigned interpolate, unsigned location)
 {
-	int ij_index = 0;
+	if (interpolate == TGSI_INTERPOLATE_COLOR ||
+		interpolate == TGSI_INTERPOLATE_LINEAR ||
+		interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+	{
+		int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR;
+		int loc;
 
-	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
-		if (ctx->shader->input[input].centroid)
-			ij_index++;
-	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
-		/* if we have perspective add one */
-		if (ctx->input_perspective)  {
-			ij_index++;
-			/* if we have perspective centroid */
-			if (ctx->input_centroid)
-				ij_index++;
+		switch(location) {
+		case TGSI_INTERPOLATE_LOC_CENTER:
+			loc = 1;
+			break;
+		case TGSI_INTERPOLATE_LOC_CENTROID:
+			loc = 2;
+			break;
+		case TGSI_INTERPOLATE_LOC_SAMPLE:
+		default:
+			loc = 0; break;
 		}
-		if (ctx->shader->input[input].centroid)
-			ij_index++;
+
+		return is_linear * 3 + loc;
 	}
 
-	ctx->shader->input[input].ij_index = ij_index;
+	return -1;
+}
+
+static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
+		int input)
+{
+	int i = eg_get_interpolator_index(
+		ctx->shader->input[input].interpolate,
+		ctx->shader->input[input].interpolate_location);
+	assert(i >= 0);
+	ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index;
 }
 
 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
@@ -582,14 +606,19 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 		ctx->shader->input[i].name = d->Semantic.Name;
 		ctx->shader->input[i].sid = d->Semantic.Index;
 		ctx->shader->input[i].interpolate = d->Interp.Interpolate;
-		ctx->shader->input[i].centroid = d->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID;
+		ctx->shader->input[i].interpolate_location = d->Interp.Location;
 		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
 		if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
 			ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
 			switch (ctx->shader->input[i].name) {
 			case TGSI_SEMANTIC_FACE:
-				if (ctx->face_gpr == -1)
+				if (ctx->face_gpr == -1) {
 					ctx->face_gpr = ctx->shader->input[i].gpr;
+				}
+				else {
+					/* already assigned by SAMPLEMASK */
+					ctx->shader->input[i].gpr = ctx->face_gpr;
+				}
 				break;
 			case TGSI_SEMANTIC_COLOR:
 				ctx->colors_used++;
@@ -679,14 +708,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 		break;
 
 	case TGSI_FILE_SYSTEM_VALUE:
-		if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK) {
-			ctx->has_samplemask = true;
-			/* lives in Front Face GPR */
-			if (ctx->face_gpr == -1)
-				ctx->face_gpr = ctx->file_offset[TGSI_FILE_SYSTEM_VALUE] + d->Range.First;
-			break;
-		}
-		else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+		if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
+			d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
+			d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
+			break; /* Handled in evergreen_gpr_count */
+		} else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
 			if (!ctx->native_integers) {
 				struct r600_bytecode_alu alu;
 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@@ -706,6 +732,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 			break;
 		} else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
 			break;
+		else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID)
+			break;
 	default:
 		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
 		return -EINVAL;
@@ -718,52 +746,168 @@ static int r600_get_temp(struct r600_shader_ctx *ctx)
 	return ctx->temp_reg + ctx->max_driver_temp_used++;
 }
 
+static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset,
+										boolean has_sampleid, boolean has_samplemaskin)
+{
+	struct {
+		boolean enabled;
+		int *reg;
+		unsigned name;
+	} inputs[2] = {
+		{ has_samplemaskin, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK }, /* lives in Front Face GPR.z */
+
+		{ has_sampleid, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID } /* SAMPLEID is in Fixed Point Position GPR.w */
+	};
+	int i, k, num_regs = 0;
+
+	for (i = 0; i < Elements(inputs); i++) {
+		boolean enabled = inputs[i].enabled;
+		int *reg = inputs[i].reg;
+		unsigned name = inputs[i].name;
+
+		if (enabled) {
+			int gpr = gpr_offset + num_regs++;
+
+			// add to inputs, allocate a gpr if needed
+			k = ctx->shader->ninput ++;
+			ctx->shader->input[k].name = name;
+			ctx->shader->input[k].sid = 0;
+			ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT;
+			ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER;
+			*reg = ctx->shader->input[k].gpr = gpr;
+		}
+	}
+
+	return num_regs;
+}
+
 /*
  * for evergreen we need to scan the shader to find the number of GPRs we need to
- * reserve for interpolation.
+ * reserve for interpolation and system values
  *
  * we need to know if we are going to emit
- * any centroid inputs
+ * any sample or centroid inputs
  * if perspective and linear are required
 */
 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
 {
 	int i;
 	int num_baryc;
+	struct tgsi_parse_context parse;
+	boolean has_sampleid, has_samplemaskin;
 
-	ctx->input_linear = FALSE;
-	ctx->input_perspective = FALSE;
-	ctx->input_centroid = FALSE;
-	ctx->num_interp_gpr = 1;
+	memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators));
 
-	/* any centroid inputs */
 	for (i = 0; i < ctx->info.num_inputs; i++) {
-		/* skip position/face */
+		int k;
+		/* skip position/face/mask/sampleid */
 		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
 		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE ||
-		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK)
+		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK ||
+		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID)
 			continue;
-		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
-			ctx->input_linear = TRUE;
-		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
-			ctx->input_perspective = TRUE;
-		if (ctx->info.input_interpolate_loc[i] == TGSI_INTERPOLATE_LOC_CENTROID)
-			ctx->input_centroid = TRUE;
+
+		k = eg_get_interpolator_index(
+			ctx->info.input_interpolate[i],
+			ctx->info.input_interpolate_loc[i]);
+		if (k >= 0)
+			ctx->eg_interpolators[k].enabled = TRUE;
+	}
+
+	/* Need to scan shader for system values */
+	if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
+		return 0;
+	}
+
+	has_sampleid = false;
+	has_samplemaskin = false;
+	while (!tgsi_parse_end_of_tokens(&parse)) {
+		tgsi_parse_token(&parse);
+
+		if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
+			struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
+			if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
+				if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK) {
+					has_samplemaskin = true;
+				}
+				else if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
+						 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
+					has_sampleid = true;
+				}
+			}
+		}
 	}
 
+	tgsi_parse_free(&parse);
+
+	/* assign gpr to each interpolator according to priority */
 	num_baryc = 0;
-	/* ignoring sample for now */
-	if (ctx->input_perspective)
-		num_baryc++;
-	if (ctx->input_linear)
-		num_baryc++;
-	if (ctx->input_centroid)
-		num_baryc *= 2;
-
-	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
-
-	/* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
-	return ctx->num_interp_gpr;
+	for (i = 0; i < Elements(ctx->eg_interpolators); i++) {
+		if (ctx->eg_interpolators[i].enabled) {
+			ctx->eg_interpolators[i].ij_index = num_baryc;
+			num_baryc ++;
+		}
+	}
+
+	/* XXX PULL MODEL and LINE STIPPLE */
+
+	num_baryc = (num_baryc + 1) >> 1;
+	return num_baryc + allocate_system_value_inputs(ctx, num_baryc, has_sampleid, has_samplemaskin);
+}
+
+/* sample_id_sel == NULL means fetch for current sample */
+static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel)
+{
+	struct r600_bytecode_vtx vtx;
+	int r, t1;
+
+	assert(ctx->fixed_pt_position_gpr != -1);
+
+	t1 = r600_get_temp(ctx);
+
+	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+	vtx.op = FETCH_OP_VFETCH;
+	vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
+	vtx.fetch_type = 2;	/* VTX_FETCH_NO_INDEX_OFFSET */
+	if (sample_id == NULL) {
+		vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
+		vtx.src_sel_x = 3;
+	}
+	else {
+		struct r600_bytecode_alu alu;
+
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ALU_OP1_MOV;
+		r600_bytecode_src(&alu.src[0], sample_id, chan_sel);
+		alu.dst.sel = t1;
+		alu.dst.write = 1;
+		alu.last = 1;
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+
+		vtx.src_gpr = t1;
+		vtx.src_sel_x = 0;
+	}
+	vtx.mega_fetch_count = 16;
+	vtx.dst_gpr = t1;
+	vtx.dst_sel_x = 0;
+	vtx.dst_sel_y = 1;
+	vtx.dst_sel_z = 7;
+	vtx.dst_sel_w = 7;
+	vtx.data_format = FMT_32_32_32_32_FLOAT;
+	vtx.num_format_all = 2;
+	vtx.format_comp_all = 1;
+	vtx.use_const_fields = 0;
+	vtx.offset = 1; // first element is size of buffer
+	vtx.endian = r600_endian_swap(32);
+	vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+
+	r = r600_bytecode_add_vtx(ctx->bc, &vtx);
+	if (r)
+		return r;
+
+	return t1;
 }
 
 static void tgsi_src(struct r600_shader_ctx *ctx,
@@ -795,10 +939,22 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
 		if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) {
 			r600_src->swizzle[0] = 2; // Z value
-			r600_src->swizzle[0] = 2;
-			r600_src->swizzle[0] = 2;
-			r600_src->swizzle[0] = 2;
+			r600_src->swizzle[1] = 2;
+			r600_src->swizzle[2] = 2;
+			r600_src->swizzle[3] = 2;
 			r600_src->sel = ctx->face_gpr;
+		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) {
+			r600_src->swizzle[0] = 3; // W value
+			r600_src->swizzle[1] = 3;
+			r600_src->swizzle[2] = 3;
+			r600_src->swizzle[3] = 3;
+			r600_src->sel = ctx->fixed_pt_position_gpr;
+		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) {
+			r600_src->swizzle[0] = 0;
+			r600_src->swizzle[1] = 1;
+			r600_src->swizzle[2] = 4;
+			r600_src->swizzle[3] = 4;
+			r600_src->sel = load_sample_position(ctx, NULL, -1);
 		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
 			r600_src->swizzle[0] = 3;
 			r600_src->swizzle[1] = 3;
@@ -811,6 +967,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 			r600_src->swizzle[2] = 0;
 			r600_src->swizzle[3] = 0;
 			r600_src->sel = 0;
+		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) {
+			r600_src->swizzle[0] = 3;
+			r600_src->swizzle[1] = 3;
+			r600_src->swizzle[2] = 3;
+			r600_src->swizzle[3] = 3;
+			r600_src->sel = 1;
 		}
 	} else {
 		if (tgsi_src->Register.Indirect)
@@ -1604,7 +1766,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	ctx.gs_next_vertex = 0;
 
 	ctx.face_gpr = -1;
-	ctx.has_samplemask = false;
+	ctx.fixed_pt_position_gpr = -1;
 	ctx.fragcoord_input = -1;
 	ctx.colors_used = 0;
 	ctx.clip_vertex_write = 0;
@@ -1753,6 +1915,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 			case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
 				shader->gs_max_out_vertices = property->u[0].Data;
 				break;
+			case TGSI_PROPERTY_GS_INVOCATIONS:
+				shader->gs_num_invocations = property->u[0].Data;
+				break;
 			}
 			break;
 		default:
@@ -1764,14 +1929,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	
 	shader->ring_item_size = ctx.next_ring_offset;
 
-	/* Need to tell setup to program FACE register */
-	if (ctx.has_samplemask && ctx.face_gpr != -1) {
-		i = ctx.shader->ninput++;
-		ctx.shader->input[i].name = TGSI_SEMANTIC_SAMPLEMASK;
-		ctx.shader->input[i].spi_sid = 0;
-		ctx.shader->input[i].gpr = ctx.face_gpr;
-	}
-
 	/* Process two side if needed */
 	if (shader->two_side && ctx.colors_used) {
 		int i, count = ctx.shader->ninput;
@@ -1784,6 +1941,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 		int gpr = ctx.file_offset[TGSI_FILE_INPUT] +
 				ctx.info.file_max[TGSI_FILE_INPUT] + 1;
 
+		/* If already set, will still be enabled by other value(s) sharing same register */
 		if (ctx.face_gpr == -1) {
 			i = ctx.shader->ninput++;
 			ctx.shader->input[i].name = TGSI_SEMANTIC_FACE;
@@ -2151,6 +2309,13 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 					output[j].swizzle_y = 1;
 					output[j].swizzle_z = output[j].swizzle_w = 7;
 					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+				} else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) {
+					output[j].array_base = 61;
+					output[j].swizzle_x = 7;
+					output[j].swizzle_y = 7;
+					output[j].swizzle_z = 0;
+					output[j].swizzle_w = 7;
+					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
 				} else {
 					R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
 					r = -EINVAL;
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index d6db8f0..20829fd 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -33,7 +33,7 @@ struct r600_shader_io {
 	int			spi_sid;
 	unsigned		interpolate;
 	unsigned		ij_index;
-	boolean                 centroid;
+	unsigned        interpolate_location; //  TGSI_INTERPOLATE_LOC_CENTER, CENTROID, SAMPLE
 	unsigned		lds_pos; /* for evergreen */
 	unsigned		back_color_input;
 	unsigned		write_mask;
@@ -74,6 +74,7 @@ struct r600_shader {
 	unsigned		gs_input_prim;
 	unsigned		gs_output_prim;
 	unsigned		gs_max_out_vertices;
+	unsigned		gs_num_invocations;
 	/* size in bytes of a data item in the ring (single vertex data) */
 	unsigned		ring_item_size;
 
@@ -114,4 +115,8 @@ struct r600_pipe_shader {
 	unsigned		ps_depth_export;
 };
 
+/* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and
+ TGSI_INTERPOLATE_LOC_CENTER/SAMPLE/COUNT. Other input values return -1. */
+int eg_get_interpolator_index(unsigned interpolate, unsigned location);
+
 #endif
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index d9342d4..bfabd6d 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -486,7 +486,8 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
 
 	sc_mode_cntl = S_028A4C_MSAA_ENABLE(state->multisample) |
 		       S_028A4C_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
-		       S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1);
+		       S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
+		       S_028A4C_PS_ITER_SAMPLE(state->multisample && rctx->ps_iter_samples > 1);
 	if (rctx->b.chip_class >= R700) {
 		sc_mode_cntl |= S_028A4C_FORCE_EOV_REZ_ENABLE(1) |
 				S_028A4C_R700_ZMM_LINE_OFFSET(1) |
@@ -1525,6 +1526,19 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 	r600_emit_msaa_state(rctx, rctx->framebuffer.nr_samples);
 }
 
+static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+
+	if (rctx->ps_iter_samples == min_samples)
+		return;
+
+	rctx->ps_iter_samples = min_samples;
+	if (rctx->framebuffer.nr_samples > 1) {
+		rctx->rasterizer_state.atom.dirty = true;
+	}
+}
+
 static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
@@ -2411,10 +2425,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 	struct r600_command_buffer *cb = &shader->command_buffer;
 	struct r600_shader *rshader = &shader->shader;
 	unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
-	int pos_index = -1, face_index = -1;
+	int pos_index = -1, face_index = -1, fixed_pt_position_index = -1;
 	unsigned tmp, sid, ufi = 0;
 	int need_linear = 0;
-	unsigned z_export = 0, stencil_export = 0;
+	unsigned z_export = 0, stencil_export = 0, mask_export = 0;
 	unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
 
 	if (!cb->buf) {
@@ -2427,8 +2441,11 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 	for (i = 0; i < rshader->ninput; i++) {
 		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
 			pos_index = i;
-		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
+		if (rshader->input[i].name == TGSI_SEMANTIC_FACE && face_index == -1)
 			face_index = i;
+		if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID && fixed_pt_position_index == -1) {
+			fixed_pt_position_index = i;
+		}
 
 		sid = rshader->input[i].spi_sid;
 
@@ -2445,9 +2462,12 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 			tmp |= S_028644_PT_SPRITE_TEX(1);
 		}
 
-		if (rshader->input[i].centroid)
+		if (rshader->input[i].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID)
 			tmp |= S_028644_SEL_CENTROID(1);
 
+		if (rshader->input[i].interpolate_location == TGSI_INTERPOLATE_LOC_SAMPLE)
+			tmp |= S_028644_SEL_SAMPLE(1);
+
 		if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) {
 			need_linear = 1;
 			tmp |= S_028644_SEL_LINEAR(1);
@@ -2462,9 +2482,12 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 			z_export = 1;
 		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
 			stencil_export = 1;
+		if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK)
+			mask_export = 1;
 	}
 	db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export);
 	db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(stencil_export);
+	db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export);
 	if (rshader->uses_kill)
 		db_shader_control |= S_02880C_KILL_ENABLE(1);
 
@@ -2490,9 +2513,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 	spi_input_z = 0;
 	if (pos_index != -1) {
 		spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
-					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
+					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) |
 					S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
-					S_0286CC_BARYC_SAMPLE_CNTL(1));
+					S_0286CC_BARYC_SAMPLE_CNTL(1)) |
+					S_0286CC_POSITION_SAMPLE(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_SAMPLE);
 		spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
 	}
 
@@ -2501,6 +2525,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 		spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
 			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
 	}
+	if (fixed_pt_position_index != -1) {
+		spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) |
+			S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr);
+	}
 
 	/* HW bug in original R600 */
 	if (rctx->b.family == CHIP_R600)
@@ -3039,6 +3067,7 @@ void r600_init_state_functions(struct r600_context *rctx)
 	rctx->b.b.create_sampler_view = r600_create_sampler_view;
 	rctx->b.b.set_framebuffer_state = r600_set_framebuffer_state;
 	rctx->b.b.set_polygon_stipple = r600_set_polygon_stipple;
+	rctx->b.b.set_min_samples = r600_set_min_samples;
 	rctx->b.b.set_scissor_states = r600_set_scissor_states;
 	rctx->b.b.get_sample_position = r600_get_sample_position;
 	rctx->b.dma_copy = r600_dma_copy;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 17568ab..e2cb5e9 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -841,6 +841,9 @@
 #define   S_02880C_KILL_ENABLE(x)                      (((x) & 0x1) << 6)
 #define   G_02880C_KILL_ENABLE(x)                      (((x) >> 6) & 0x1)
 #define   C_02880C_KILL_ENABLE                         0xFFFFFFBF
+#define   S_02880C_MASK_EXPORT_ENABLE(x)               (((x) & 0x1) << 8)
+#define   G_02880C_MASK_EXPORT_ENABLE(x)               (((x) >> 8) & 0x1)
+#define   C_02880C_MASK_EXPORT_ENABLE                  0xFFFFFEFF
 #define   S_02880C_DUAL_EXPORT_ENABLE(x)               (((x) & 0x1) << 9)
 #define   G_02880C_DUAL_EXPORT_ENABLE(x)               (((x) >> 9) & 0x1)
 #define   C_02880C_DUAL_EXPORT_ENABLE                  0xFFFFFDFF
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index 346ccc9..d787e5b 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -147,25 +147,28 @@ int bc_parser::parse_decls() {
 	bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
 			&& sh->target == TARGET_PS;
 
-	unsigned linear = 0, persp = 0, centroid = 1;
+	bool ij_interpolators[6];
+	memset(ij_interpolators, 0, sizeof(ij_interpolators));
 
 	for (unsigned i = 0; i < pshader->ninput; ++i) {
 		r600_shader_io & in = pshader->input[i];
 		bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
 		sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
 		if (ps_interp && in.spi_sid) {
-			if (in.interpolate == TGSI_INTERPOLATE_LINEAR ||
-					in.interpolate == TGSI_INTERPOLATE_COLOR)
-				linear = 1;
-			else if (in.interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
-				persp = 1;
-			if (in.centroid)
-				centroid = 2;
+			int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location);
+			if (k >= 0)
+				ij_interpolators[k] |= true;
 		}
 	}
 
 	if (ps_interp) {
-		unsigned mask = (1 << (2 * (linear + persp) * centroid)) - 1;
+		/* add the egcm ij interpolators to live inputs */
+		unsigned num_ij = 0;
+		for (unsigned i = 0; i < Elements(ij_interpolators); i++) {
+			num_ij += ij_interpolators[i];
+		}
+
+		unsigned mask = (1 << (2 * num_ij)) - 1;
 		unsigned gpr = 0;
 
 		while (mask) {
-- 
1.9.1



More information about the mesa-dev mailing list