[Mesa-dev] [PATCH v3] r600g: Implement GL_ARB_sample_shading
Marek Olšák
maraeo at gmail.com
Sun Oct 12 14:55:59 PDT 2014
Pushed, thanks.
Marek
On Wed, Sep 10, 2014 at 11:54 AM, Glenn Kennard <glenn.kennard at gmail.com> wrote:
> Also fixes two sided lighting which was broken at least
> on pre-evergreen by commit b1eb00.
>
> Signed-off-by: Glenn Kennard <glenn.kennard at gmail.com>
> ---
> Changes since patch v2:
> Added workarounds for known hardware issues on R600 and RV770 when
> sample shading is used together with hyperz, thanks Marek.
>
> Changes since patch v1:
> Factor out and set sample positions also for pre-evergreen
> Misc r600 breakage fixes
> Some cleanup
>
> Passes piglit without regressions on radeon 6670 and RV770.
>
> R600/R700 fail the ignore-centroid-qualifier and interpolate-at-sample-position
> piglit test cases, I believe that is due to the tests requiring more
> interpolation precision than typical DX10 feature level hardware has, 11
> bits for DX11 hardware vs 8 for DX10 if i remember correctly.
>
> docs/GL3.txt | 2 +-
> docs/relnotes/10.4.html | 62 ++++++
> src/gallium/drivers/r600/evergreen_state.c | 85 +++++---
> src/gallium/drivers/r600/evergreend.h | 3 +
> src/gallium/drivers/r600/r600_pipe.c | 2 +-
> src/gallium/drivers/r600/r600_pipe.h | 10 +
> src/gallium/drivers/r600/r600_shader.c | 292 ++++++++++++++++++++-------
> src/gallium/drivers/r600/r600_shader.h | 6 +-
> src/gallium/drivers/r600/r600_state.c | 60 +++++-
> src/gallium/drivers/r600/r600_state_common.c | 20 ++
> src/gallium/drivers/r600/r600d.h | 3 +
> src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 21 +-
> 12 files changed, 446 insertions(+), 120 deletions(-)
> create mode 100644 docs/relnotes/10.4.html
>
> diff --git a/docs/GL3.txt b/docs/GL3.txt
> index f02ad67..372368f 100644
> --- a/docs/GL3.txt
> +++ b/docs/GL3.txt
> @@ -110,7 +110,7 @@ GL 4.0, GLSL 4.00:
> - Interpolation functions DONE ()
> - New overload resolution rules DONE
> GL_ARB_gpu_shader_fp64 started (Dave)
> - GL_ARB_sample_shading DONE (i965, nv50, nvc0, radeonsi)
> + GL_ARB_sample_shading DONE (i965, nv50, nvc0, r600, radeonsi)
> GL_ARB_shader_subroutine not started
> GL_ARB_tessellation_shader started (Chris, Ilia)
> GL_ARB_texture_buffer_object_rgb32 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
> diff --git a/docs/relnotes/10.4.html b/docs/relnotes/10.4.html
> new file mode 100644
> index 0000000..d56275d
> --- /dev/null
> +++ b/docs/relnotes/10.4.html
> @@ -0,0 +1,62 @@
> +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
> +<html lang="en">
> +<head>
> + <meta http-equiv="content-type" content="text/html; charset=utf-8">
> + <title>Mesa Release Notes</title>
> + <link rel="stylesheet" type="text/css" href="../mesa.css">
> +</head>
> +<body>
> +
> +<div class="header">
> + <h1>The Mesa 3D Graphics Library</h1>
> +</div>
> +
> +<iframe src="../contents.html"></iframe>
> +<div class="content">
> +
> +<h1>Mesa 10.4 Release Notes / TBD</h1>
> +
> +<p>
> +Mesa 10.4 is a new development release.
> +People who are concerned with stability and reliability should stick
> +with a previous release or wait for Mesa 10.4.1.
> +</p>
> +<p>
> +Mesa 10.4 implements the OpenGL 3.3 API, but the version reported by
> +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
> +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
> +Some drivers don't support all the features required in OpenGL 3.3. OpenGL
> +3.3 is <strong>only</strong> available if requested at context creation
> +because compatibility contexts are not supported.
> +</p>
> +
> +
> +<h2>MD5 checksums</h2>
> +<pre>
> +TBD.
> +</pre>
> +
> +
> +<h2>New features</h2>
> +
> +<p>
> +Note: some of the new features are only available with certain drivers.
> +</p>
> +
> +<ul>
> +<li>GL_ARB_sample_shading on r600</li>
> +</ul>
> +
> +
> +<h2>Bug fixes</h2>
> +
> +TBD.
> +
> +<h2>Changes</h2>
> +
> +<ul>
> +</ul>
> +
> +</div>
> +</body>
> +</html>
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index e7faeaf..e0adb1e 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1400,7 +1400,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
>
> /* MSAA. */
> if (rctx->b.chip_class == EVERGREEN)
> - rctx->framebuffer.atom.num_dw += 14; /* Evergreen */
> + rctx->framebuffer.atom.num_dw += 17; /* Evergreen */
> else
> rctx->framebuffer.atom.num_dw += 28; /* Cayman */
>
> @@ -1420,8 +1420,22 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
> }
>
> rctx->framebuffer.atom.dirty = true;
> +
> + r600_set_sample_locations_constant_buffer(rctx);
> }
>
> +static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
> +{
> + struct r600_context *rctx = (struct r600_context *)ctx;
> +
> + if (rctx->ps_iter_samples == min_samples)
> + return;
> +
> + rctx->ps_iter_samples = min_samples;
> + if (rctx->framebuffer.nr_samples > 1) {
> + rctx->framebuffer.atom.dirty = true;
> + }
> +}
>
> /* 8xMSAA */
> static uint32_t sample_locs_8x[] = {
> @@ -1475,7 +1489,7 @@ static void evergreen_get_sample_position(struct pipe_context *ctx,
> }
> }
>
> -static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples)
> +static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples)
> {
>
> struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
> @@ -1508,10 +1522,12 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples)
> S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
> radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
> S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
> + r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
> } else {
> r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
> radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
> radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
> + r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
> }
> }
>
> @@ -1672,10 +1688,10 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
> radeon_emit(cs, br); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */
>
> if (rctx->b.chip_class == EVERGREEN) {
> - evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples);
> + evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples, rctx->ps_iter_samples);
> } else {
> cayman_emit_msaa_sample_locs(cs, rctx->framebuffer.nr_samples);
> - cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, 1);
> + cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, rctx->ps_iter_samples);
> }
> }
>
> @@ -2432,8 +2448,6 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
> r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
> }
>
> - r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
> -
> /* The cs checker requires this register to be set. */
> r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
>
> @@ -2786,11 +2800,19 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
> struct r600_command_buffer *cb = &shader->command_buffer;
> struct r600_shader *rshader = &shader->shader;
> unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control = 0;
> - int pos_index = -1, face_index = -1;
> + int pos_index = -1, face_index = -1, fixed_pt_position_index = -1;
> int ninterp = 0;
> - boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
> - unsigned spi_baryc_cntl, sid, tmp, num = 0;
> - unsigned z_export = 0, stencil_export = 0;
> + boolean have_perspective = FALSE, have_linear = FALSE;
> + static const unsigned spi_baryc_enable_bit[6] = {
> + S_0286E0_PERSP_SAMPLE_ENA(1),
> + S_0286E0_PERSP_CENTER_ENA(1),
> + S_0286E0_PERSP_CENTROID_ENA(1),
> + S_0286E0_LINEAR_SAMPLE_ENA(1),
> + S_0286E0_LINEAR_CENTER_ENA(1),
> + S_0286E0_LINEAR_CENTROID_ENA(1)
> + };
> + unsigned spi_baryc_cntl = 0, sid, tmp, num = 0;
> + unsigned z_export = 0, stencil_export = 0, mask_export = 0;
> unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
> uint32_t spi_ps_input_cntl[32];
>
> @@ -2813,14 +2835,19 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
> if (face_index == -1)
> face_index = i; /* lives in same register, same enable bit */
> }
> + else if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID) {
> + fixed_pt_position_index = i;
> + }
> else {
> ninterp++;
> - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
> - have_linear = TRUE;
> - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
> - have_perspective = TRUE;
> - if (rshader->input[i].centroid)
> - have_centroid = TRUE;
> + int k = eg_get_interpolator_index(
> + rshader->input[i].interpolate,
> + rshader->input[i].interpolate_location);
> + if (k >= 0) {
> + spi_baryc_cntl |= spi_baryc_enable_bit[k];
> + have_perspective |= k < 3;
> + have_linear |= !(k < 3);
> + }
> }
>
> sid = rshader->input[i].spi_sid;
> @@ -2852,17 +2879,22 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
> z_export = 1;
> if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
> stencil_export = 1;
> + if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK &&
> + rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0)
> + mask_export = 1;
> }
> if (rshader->uses_kill)
> db_shader_control |= S_02880C_KILL_ENABLE(1);
>
> db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export);
> db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export);
> + db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export);
>
> exports_ps = 0;
> for (i = 0; i < rshader->noutput; i++) {
> if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
> - rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
> + rshader->output[i].name == TGSI_SEMANTIC_STENCIL ||
> + rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK)
> exports_ps |= 1;
> }
>
> @@ -2878,6 +2910,8 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
> ninterp = 1;
> have_perspective = TRUE;
> }
> + if (!spi_baryc_cntl)
> + spi_baryc_cntl |= spi_baryc_enable_bit[0];
>
> if (!have_perspective && !have_linear)
> have_perspective = TRUE;
> @@ -2888,7 +2922,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
> spi_input_z = 0;
> if (pos_index != -1) {
> spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) |
> - S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
> + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) |
> S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr);
> spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
> }
> @@ -2898,14 +2932,10 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
> spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
> S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
> }
> -
> - spi_baryc_cntl = 0;
> - if (have_perspective)
> - spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) |
> - S_0286E0_PERSP_CENTROID_ENA(have_centroid);
> - if (have_linear)
> - spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) |
> - S_0286E0_LINEAR_CENTROID_ENA(have_centroid);
> + if (fixed_pt_position_index != -1) {
> + spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) |
> + S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr);
> + }
>
> r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2);
> r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */
> @@ -2924,7 +2954,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
> /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
>
> shader->db_shader_control = db_shader_control;
> - shader->ps_depth_export = z_export | stencil_export;
> + shader->ps_depth_export = z_export | stencil_export | mask_export;
>
> shader->sprite_coord_enable = sprite_coord_enable;
> if (rctx->rasterizer)
> @@ -3446,6 +3476,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
> rctx->b.b.create_sampler_view = evergreen_create_sampler_view;
> rctx->b.b.set_framebuffer_state = evergreen_set_framebuffer_state;
> rctx->b.b.set_polygon_stipple = evergreen_set_polygon_stipple;
> + rctx->b.b.set_min_samples = evergreen_set_min_samples;
> rctx->b.b.set_scissor_states = evergreen_set_scissor_states;
>
> if (rctx->b.chip_class == EVERGREEN)
> diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
> index 784d495..4989996 100644
> --- a/src/gallium/drivers/r600/evergreend.h
> +++ b/src/gallium/drivers/r600/evergreend.h
> @@ -803,6 +803,9 @@
> #define S_02880C_KILL_ENABLE(x) (((x) & 0x1) << 6)
> #define G_02880C_KILL_ENABLE(x) (((x) >> 6) & 0x1)
> #define C_02880C_KILL_ENABLE 0xFFFFFFBF
> +#define S_02880C_MASK_EXPORT_ENABLE(x) (((x) & 0x1) << 8)
> +#define G_02880C_MASK_EXPORT_ENABLE(x) (((x) >> 8) & 0x1)
> +#define C_02880C_MASK_EXPORT_ENABLE 0XFFFFFEFF
> #define S_02880C_DUAL_EXPORT_ENABLE(x) (((x) & 0x1) << 9)
> #define G_02880C_DUAL_EXPORT_ENABLE(x) (((x) >> 9) & 0x1)
> #define C_02880C_DUAL_EXPORT_ENABLE 0xFFFFFDFF
> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
> index 6e00eff..0608a17 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -262,6 +262,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
> case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
> case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
> + case PIPE_CAP_SAMPLE_SHADING:
> return 1;
>
> case PIPE_CAP_COMPUTE:
> @@ -316,7 +317,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
> case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
> case PIPE_CAP_VERTEX_COLOR_CLAMPED:
> case PIPE_CAP_USER_VERTEX_BUFFERS:
> - case PIPE_CAP_SAMPLE_SHADING:
> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
> case PIPE_CAP_DRAW_INDIRECT:
> case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index 8907d41..12019c0 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -52,6 +52,14 @@
> #define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
> #define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
> #define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3)
> +/* Currently R600_MAX_CONST_BUFFERS is too large, the hardware only has 16 buffers, but the driver is
> + * trying to use 17. Avoid accidentally aliasing with user UBOs for SAMPLE_POSITIONS by using an id<16.
> + * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
> + *
> + * Fixing this properly would require the driver to combine its buffers into a single hardware buffer,
> + * which would also allow supporting the d3d 11 mandated minimum of 15 user const buffers.
> + */
> +#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
>
> #define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))
>
> @@ -452,6 +460,7 @@ struct r600_context {
> bool force_blend_disable;
> boolean dual_src_blend;
> unsigned zwritemask;
> + int ps_iter_samples;
>
> /* Index buffer. */
> struct pipe_index_buffer index_buffer;
> @@ -632,6 +641,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
> void r600_sampler_states_dirty(struct r600_context *rctx,
> struct r600_sampler_states *state);
> void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state);
> +void r600_set_sample_locations_constant_buffer(struct r600_context *rctx);
> uint32_t r600_translate_stencil_op(int s_op);
> uint32_t r600_translate_fill(uint32_t func);
> unsigned r600_tex_wrap(unsigned wrap);
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index 9f10c20..9e9a557 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -64,6 +64,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
> struct r600_pipe_shader *pipeshader,
> struct r600_shader_key key);
>
> +
> static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
> int size, unsigned comp_mask) {
>
> @@ -267,6 +268,11 @@ struct r600_shader_src {
> uint32_t value[4];
> };
>
> +struct eg_interp {
> + boolean enabled;
> + unsigned ij_index;
> +};
> +
> struct r600_shader_ctx {
> struct tgsi_shader_info info;
> struct tgsi_parse_context parse;
> @@ -283,13 +289,11 @@ struct r600_shader_ctx {
> uint32_t max_driver_temp_used;
> boolean use_llvm;
> /* needed for evergreen interpolation */
> - boolean input_centroid;
> - boolean input_linear;
> - boolean input_perspective;
> - int num_interp_gpr;
> + struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid
> /* evergreen/cayman also store sample mask in face register */
> int face_gpr;
> - boolean has_samplemask;
> + /* sample id is .w component stored in fixed point position register */
> + int fixed_pt_position_gpr;
> int colors_used;
> boolean clip_vertex_write;
> unsigned cv_output;
> @@ -320,6 +324,12 @@ static int tgsi_endif(struct r600_shader_ctx *ctx);
> static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
> static int tgsi_endloop(struct r600_shader_ctx *ctx);
> static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
> +static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
> + unsigned int cb_idx, unsigned int offset, unsigned ar_chan,
> + unsigned int dst_reg);
> +static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
> + const struct r600_shader_src *shader_src,
> + unsigned chan);
>
> static int tgsi_is_supported(struct r600_shader_ctx *ctx)
> {
> @@ -364,27 +374,41 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
> return 0;
> }
>
> -static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
> - int input)
> +int eg_get_interpolator_index(unsigned interpolate, unsigned location)
> {
> - int ij_index = 0;
> + if (interpolate == TGSI_INTERPOLATE_COLOR ||
> + interpolate == TGSI_INTERPOLATE_LINEAR ||
> + interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
> + {
> + int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR;
> + int loc;
>
> - if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
> - if (ctx->shader->input[input].centroid)
> - ij_index++;
> - } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
> - /* if we have perspective add one */
> - if (ctx->input_perspective) {
> - ij_index++;
> - /* if we have perspective centroid */
> - if (ctx->input_centroid)
> - ij_index++;
> + switch(location) {
> + case TGSI_INTERPOLATE_LOC_CENTER:
> + loc = 1;
> + break;
> + case TGSI_INTERPOLATE_LOC_CENTROID:
> + loc = 2;
> + break;
> + case TGSI_INTERPOLATE_LOC_SAMPLE:
> + default:
> + loc = 0; break;
> }
> - if (ctx->shader->input[input].centroid)
> - ij_index++;
> +
> + return is_linear * 3 + loc;
> }
>
> - ctx->shader->input[input].ij_index = ij_index;
> + return -1;
> +}
> +
> +static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
> + int input)
> +{
> + int i = eg_get_interpolator_index(
> + ctx->shader->input[input].interpolate,
> + ctx->shader->input[input].interpolate_location);
> + assert(i >= 0);
> + ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index;
> }
>
> static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
> @@ -582,13 +606,15 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
> ctx->shader->input[i].name = d->Semantic.Name;
> ctx->shader->input[i].sid = d->Semantic.Index;
> ctx->shader->input[i].interpolate = d->Interp.Interpolate;
> - ctx->shader->input[i].centroid = d->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID;
> + ctx->shader->input[i].interpolate_location = d->Interp.Location;
> ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
> if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
> ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
> switch (ctx->shader->input[i].name) {
> case TGSI_SEMANTIC_FACE:
> - if (ctx->face_gpr == -1)
> + if (ctx->face_gpr != -1)
> + ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
> + else
> ctx->face_gpr = ctx->shader->input[i].gpr;
> break;
> case TGSI_SEMANTIC_COLOR:
> @@ -679,14 +705,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
> break;
>
> case TGSI_FILE_SYSTEM_VALUE:
> - if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK) {
> - ctx->has_samplemask = true;
> - /* lives in Front Face GPR */
> - if (ctx->face_gpr == -1)
> - ctx->face_gpr = ctx->file_offset[TGSI_FILE_SYSTEM_VALUE] + d->Range.First;
> - break;
> - }
> - else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
> + if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
> + d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
> + d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
> + break; /* Already handled from allocate_system_value_inputs */
> + } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
> if (!ctx->native_integers) {
> struct r600_bytecode_alu alu;
> memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> @@ -720,12 +743,69 @@ static int r600_get_temp(struct r600_shader_ctx *ctx)
> return ctx->temp_reg + ctx->max_driver_temp_used++;
> }
>
> +static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset)
> +{
> + struct tgsi_parse_context parse;
> + struct {
> + boolean enabled;
> + int *reg;
> + unsigned name, alternate_name;
> + } inputs[2] = {
> + { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */
> +
> + { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */
> + };
> + int i, k, num_regs = 0;
> +
> + if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
> + return 0;
> + }
> +
> + while (!tgsi_parse_end_of_tokens(&parse)) {
> + tgsi_parse_token(&parse);
> +
> + if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
> + struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
> + if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
> + for (k = 0; k < Elements(inputs); k++) {
> + if (d->Semantic.Name == inputs[k].name ||
> + d->Semantic.Name == inputs[k].alternate_name) {
> + inputs[k].enabled = true;
> + }
> + }
> + }
> + }
> + }
> +
> + tgsi_parse_free(&parse);
> +
> + for (i = 0; i < Elements(inputs); i++) {
> + boolean enabled = inputs[i].enabled;
> + int *reg = inputs[i].reg;
> + unsigned name = inputs[i].name;
> +
> + if (enabled) {
> + int gpr = gpr_offset + num_regs++;
> +
> + // add to inputs, allocate a gpr
> + k = ctx->shader->ninput ++;
> + ctx->shader->input[k].name = name;
> + ctx->shader->input[k].sid = 0;
> + ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT;
> + ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER;
> + *reg = ctx->shader->input[k].gpr = gpr;
> + }
> + }
> +
> + return gpr_offset + num_regs;
> +}
> +
> /*
> * for evergreen we need to scan the shader to find the number of GPRs we need to
> - * reserve for interpolation.
> + * reserve for interpolation and system values
> *
> * we need to know if we are going to emit
> - * any centroid inputs
> + * any sample or centroid inputs
> * if perspective and linear are required
> */
> static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
> @@ -733,39 +813,92 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
> int i;
> int num_baryc;
>
> - ctx->input_linear = FALSE;
> - ctx->input_perspective = FALSE;
> - ctx->input_centroid = FALSE;
> - ctx->num_interp_gpr = 1;
> + memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators));
>
> - /* any centroid inputs */
> for (i = 0; i < ctx->info.num_inputs; i++) {
> - /* skip position/face */
> + int k;
> + /* skip position/face/mask/sampleid */
> if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
> ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE ||
> - ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK)
> + ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK ||
> + ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID)
> continue;
> - if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
> - ctx->input_linear = TRUE;
> - if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
> - ctx->input_perspective = TRUE;
> - if (ctx->info.input_interpolate_loc[i] == TGSI_INTERPOLATE_LOC_CENTROID)
> - ctx->input_centroid = TRUE;
> +
> + k = eg_get_interpolator_index(
> + ctx->info.input_interpolate[i],
> + ctx->info.input_interpolate_loc[i]);
> + if (k >= 0)
> + ctx->eg_interpolators[k].enabled = TRUE;
> }
>
> + /* assign gpr to each interpolator according to priority */
> num_baryc = 0;
> - /* ignoring sample for now */
> - if (ctx->input_perspective)
> - num_baryc++;
> - if (ctx->input_linear)
> - num_baryc++;
> - if (ctx->input_centroid)
> - num_baryc *= 2;
> -
> - ctx->num_interp_gpr += (num_baryc + 1) >> 1;
> -
> - /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
> - return ctx->num_interp_gpr;
> + for (i = 0; i < Elements(ctx->eg_interpolators); i++) {
> + if (ctx->eg_interpolators[i].enabled) {
> + ctx->eg_interpolators[i].ij_index = num_baryc;
> + num_baryc ++;
> + }
> + }
> +
> + /* XXX PULL MODEL and LINE STIPPLE */
> +
> + num_baryc = (num_baryc + 1) >> 1;
> + return allocate_system_value_inputs(ctx, num_baryc);
> +}
> +
> +/* sample_id_sel == NULL means fetch for current sample */
> +static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel)
> +{
> + struct r600_bytecode_vtx vtx;
> + int r, t1;
> +
> + assert(ctx->fixed_pt_position_gpr != -1);
> +
> + t1 = r600_get_temp(ctx);
> +
> + memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
> + vtx.op = FETCH_OP_VFETCH;
> + vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
> + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
> + if (sample_id == NULL) {
> + vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
> + vtx.src_sel_x = 3;
> + }
> + else {
> + struct r600_bytecode_alu alu;
> +
> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> + alu.op = ALU_OP1_MOV;
> + r600_bytecode_src(&alu.src[0], sample_id, chan_sel);
> + alu.dst.sel = t1;
> + alu.dst.write = 1;
> + alu.last = 1;
> + r = r600_bytecode_add_alu(ctx->bc, &alu);
> + if (r)
> + return r;
> +
> + vtx.src_gpr = t1;
> + vtx.src_sel_x = 0;
> + }
> + vtx.mega_fetch_count = 16;
> + vtx.dst_gpr = t1;
> + vtx.dst_sel_x = 0;
> + vtx.dst_sel_y = 1;
> + vtx.dst_sel_z = 7;
> + vtx.dst_sel_w = 7;
> + vtx.data_format = FMT_32_32_32_32_FLOAT;
> + vtx.num_format_all = 2;
> + vtx.format_comp_all = 1;
> + vtx.use_const_fields = 0;
> + vtx.offset = 1; // first element is size of buffer
> + vtx.endian = r600_endian_swap(32);
> + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
> +
> + r = r600_bytecode_add_vtx(ctx->bc, &vtx);
> + if (r)
> + return r;
> +
> + return t1;
> }
>
> static void tgsi_src(struct r600_shader_ctx *ctx,
> @@ -797,10 +930,22 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
> } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
> if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) {
> r600_src->swizzle[0] = 2; // Z value
> - r600_src->swizzle[0] = 2;
> - r600_src->swizzle[0] = 2;
> - r600_src->swizzle[0] = 2;
> + r600_src->swizzle[1] = 2;
> + r600_src->swizzle[2] = 2;
> + r600_src->swizzle[3] = 2;
> r600_src->sel = ctx->face_gpr;
> + } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) {
> + r600_src->swizzle[0] = 3; // W value
> + r600_src->swizzle[1] = 3;
> + r600_src->swizzle[2] = 3;
> + r600_src->swizzle[3] = 3;
> + r600_src->sel = ctx->fixed_pt_position_gpr;
> + } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) {
> + r600_src->swizzle[0] = 0;
> + r600_src->swizzle[1] = 1;
> + r600_src->swizzle[2] = 4;
> + r600_src->swizzle[3] = 4;
> + r600_src->sel = load_sample_position(ctx, NULL, -1);
> } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
> r600_src->swizzle[0] = 3;
> r600_src->swizzle[1] = 3;
> @@ -1612,7 +1757,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
> ctx.gs_next_vertex = 0;
>
> ctx.face_gpr = -1;
> - ctx.has_samplemask = false;
> + ctx.fixed_pt_position_gpr = -1;
> ctx.fragcoord_input = -1;
> ctx.colors_used = 0;
> ctx.clip_vertex_write = 0;
> @@ -1661,8 +1806,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
> r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
> }
> }
> - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
> - ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
> + if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
> + if (ctx.bc->chip_class >= EVERGREEN)
> + ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
> + else
> + ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]);
> }
> if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
> /* FIXME 1 would be enough in some cases (3 or less input vertices) */
> @@ -1775,14 +1923,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
>
> shader->ring_item_size = ctx.next_ring_offset;
>
> - /* Need to tell setup to program FACE register */
> - if (ctx.has_samplemask && ctx.face_gpr != -1) {
> - i = ctx.shader->ninput++;
> - ctx.shader->input[i].name = TGSI_SEMANTIC_SAMPLEMASK;
> - ctx.shader->input[i].spi_sid = 0;
> - ctx.shader->input[i].gpr = ctx.face_gpr;
> - }
> -
> /* Process two side if needed */
> if (shader->two_side && ctx.colors_used) {
> int i, count = ctx.shader->ninput;
> @@ -1795,6 +1935,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
> int gpr = ctx.file_offset[TGSI_FILE_INPUT] +
> ctx.info.file_max[TGSI_FILE_INPUT] + 1;
>
> + /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */
> if (ctx.face_gpr == -1) {
> i = ctx.shader->ninput++;
> ctx.shader->input[i].name = TGSI_SEMANTIC_FACE;
> @@ -2162,6 +2303,13 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
> output[j].swizzle_y = 1;
> output[j].swizzle_z = output[j].swizzle_w = 7;
> output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
> + } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) {
> + output[j].array_base = 61;
> + output[j].swizzle_x = 7;
> + output[j].swizzle_y = 7;
> + output[j].swizzle_z = 0;
> + output[j].swizzle_w = 7;
> + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
> } else {
> R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
> r = -EINVAL;
> diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
> index 4b27ede..20829fd 100644
> --- a/src/gallium/drivers/r600/r600_shader.h
> +++ b/src/gallium/drivers/r600/r600_shader.h
> @@ -33,7 +33,7 @@ struct r600_shader_io {
> int spi_sid;
> unsigned interpolate;
> unsigned ij_index;
> - boolean centroid;
> + unsigned interpolate_location; // TGSI_INTERPOLATE_LOC_CENTER, CENTROID, SAMPLE
> unsigned lds_pos; /* for evergreen */
> unsigned back_color_input;
> unsigned write_mask;
> @@ -115,4 +115,8 @@ struct r600_pipe_shader {
> unsigned ps_depth_export;
> };
>
> +/* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and
> + TGSI_INTERPOLATE_LOC_CENTER/SAMPLE/COUNT. Other input values return -1. */
> +int eg_get_interpolator_index(unsigned interpolate, unsigned location);
> +
> #endif
> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> index 36f7750..e034710 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -486,7 +486,12 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
>
> sc_mode_cntl = S_028A4C_MSAA_ENABLE(state->multisample) |
> S_028A4C_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
> - S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1);
> + S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
> + S_028A4C_PS_ITER_SAMPLE(state->multisample && rctx->ps_iter_samples > 1);
> + if (rctx->b.family == CHIP_RV770) {
> + /* workaround possible rendering corruption on RV770 with hyperz together with sample shading */
> + sc_mode_cntl |= S_028A4C_TILE_COVER_DISABLE(state->multisample && rctx->ps_iter_samples > 1);
> + }
> if (rctx->b.chip_class >= R700) {
> sc_mode_cntl |= S_028A4C_FORCE_EOV_REZ_ENABLE(1) |
> S_028A4C_R700_ZMM_LINE_OFFSET(1) |
> @@ -1245,6 +1250,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
> }
>
> rctx->framebuffer.atom.dirty = true;
> +
> + r600_set_sample_locations_constant_buffer(rctx);
> }
>
> static uint32_t sample_locs_2x[] = {
> @@ -1524,6 +1531,21 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
> r600_emit_msaa_state(rctx, rctx->framebuffer.nr_samples);
> }
>
> +static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
> +{
> + struct r600_context *rctx = (struct r600_context *)ctx;
> +
> + if (rctx->ps_iter_samples == min_samples)
> + return;
> +
> + rctx->ps_iter_samples = min_samples;
> + if (rctx->framebuffer.nr_samples > 1) {
> + rctx->rasterizer_state.atom.dirty = true;
> + if (rctx->b.chip_class == R600)
> + rctx->db_misc_state.atom.dirty = true;
> + }
> +}
> +
> static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
> {
> struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
> @@ -1603,6 +1625,10 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
> } else {
> db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
> }
> + if (rctx->b.chip_class == R600 && rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) {
> + /* sample shading and hyperz causes lockups on R6xx chips */
> + db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
> + }
> if (a->flush_depthstencil_through_cb) {
> assert(a->copy_depth || a->copy_stencil);
>
> @@ -2418,10 +2444,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
> struct r600_command_buffer *cb = &shader->command_buffer;
> struct r600_shader *rshader = &shader->shader;
> unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
> - int pos_index = -1, face_index = -1;
> + int pos_index = -1, face_index = -1, fixed_pt_position_index = -1;
> unsigned tmp, sid, ufi = 0;
> int need_linear = 0;
> - unsigned z_export = 0, stencil_export = 0;
> + unsigned z_export = 0, stencil_export = 0, mask_export = 0;
> unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
>
> if (!cb->buf) {
> @@ -2434,8 +2460,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
> for (i = 0; i < rshader->ninput; i++) {
> if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
> pos_index = i;
> - if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
> + if (rshader->input[i].name == TGSI_SEMANTIC_FACE && face_index == -1)
> face_index = i;
> + if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID)
> + fixed_pt_position_index = i;
>
> sid = rshader->input[i].spi_sid;
>
> @@ -2452,9 +2480,12 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
> tmp |= S_028644_PT_SPRITE_TEX(1);
> }
>
> - if (rshader->input[i].centroid)
> + if (rshader->input[i].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID)
> tmp |= S_028644_SEL_CENTROID(1);
>
> + if (rshader->input[i].interpolate_location == TGSI_INTERPOLATE_LOC_SAMPLE)
> + tmp |= S_028644_SEL_SAMPLE(1);
> +
> if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) {
> need_linear = 1;
> tmp |= S_028644_SEL_LINEAR(1);
> @@ -2469,16 +2500,21 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
> z_export = 1;
> if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
> stencil_export = 1;
> + if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK &&
> + rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0)
> + mask_export = 1;
> }
> db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export);
> db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(stencil_export);
> + db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export);
> if (rshader->uses_kill)
> db_shader_control |= S_02880C_KILL_ENABLE(1);
>
> exports_ps = 0;
> for (i = 0; i < rshader->noutput; i++) {
> if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
> - rshader->output[i].name == TGSI_SEMANTIC_STENCIL) {
> + rshader->output[i].name == TGSI_SEMANTIC_STENCIL ||
> + rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) {
> exports_ps |= 1;
> }
> }
> @@ -2497,9 +2533,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
> spi_input_z = 0;
> if (pos_index != -1) {
> spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
> - S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
> + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) |
> S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
> - S_0286CC_BARYC_SAMPLE_CNTL(1));
> + S_0286CC_BARYC_SAMPLE_CNTL(1)) |
> + S_0286CC_POSITION_SAMPLE(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_SAMPLE);
> spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
> }
>
> @@ -2508,6 +2545,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
> spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
> S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
> }
> + if (fixed_pt_position_index != -1) {
> + spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) |
> + S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr);
> + }
>
> /* HW bug in original R600 */
> if (rctx->b.family == CHIP_R600)
> @@ -2531,7 +2572,7 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
>
> /* only set some bits here, the other bits are set in the dsa state */
> shader->db_shader_control = db_shader_control;
> - shader->ps_depth_export = z_export | stencil_export;
> + shader->ps_depth_export = z_export | stencil_export | mask_export;
>
> shader->sprite_coord_enable = sprite_coord_enable;
> if (rctx->rasterizer)
> @@ -3046,6 +3087,7 @@ void r600_init_state_functions(struct r600_context *rctx)
> rctx->b.b.create_sampler_view = r600_create_sampler_view;
> rctx->b.b.set_framebuffer_state = r600_set_framebuffer_state;
> rctx->b.b.set_polygon_stipple = r600_set_polygon_stipple;
> + rctx->b.b.set_min_samples = r600_set_min_samples;
> rctx->b.b.set_scissor_states = r600_set_scissor_states;
> rctx->b.b.get_sample_position = r600_get_sample_position;
> rctx->b.dma_copy = r600_dma_copy;
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index d9174a5..68365f9 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -1085,6 +1085,26 @@ static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int s
> pipe_resource_reference(&cb.buffer, NULL);
> }
>
> +/* set sample xy locations as array of fragment shader constants */
> +void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
> +{
> + struct pipe_constant_buffer constbuf = {0};
> + float values[4*16] = {0.0f};
> + int i;
> + struct pipe_context *ctx = &rctx->b.b;
> +
> + assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
> + for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
> + ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &values[4*i]);
> + }
> +
> + constbuf.user_buffer = values;
> + constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
> + ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
> + R600_SAMPLE_POSITIONS_CONST_BUFFER, &constbuf);
> + pipe_resource_reference(&constbuf.buffer, NULL);
> +}
> +
> static void update_shader_atom(struct pipe_context *ctx,
> struct r600_shader_state *state,
> struct r600_pipe_shader *shader)
> diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
> index 3cf7b88..6a5b964 100644
> --- a/src/gallium/drivers/r600/r600d.h
> +++ b/src/gallium/drivers/r600/r600d.h
> @@ -841,6 +841,9 @@
> #define S_02880C_KILL_ENABLE(x) (((x) & 0x1) << 6)
> #define G_02880C_KILL_ENABLE(x) (((x) >> 6) & 0x1)
> #define C_02880C_KILL_ENABLE 0xFFFFFFBF
> +#define S_02880C_MASK_EXPORT_ENABLE(x) (((x) & 0x1) << 8)
> +#define G_02880C_MASK_EXPORT_ENABLE(x) (((x) >> 8) & 0x1)
> +#define C_02880C_MASK_EXPORT_ENABLE 0xFFFFFEFF
> #define S_02880C_DUAL_EXPORT_ENABLE(x) (((x) & 0x1) << 9)
> #define G_02880C_DUAL_EXPORT_ENABLE(x) (((x) >> 9) & 0x1)
> #define C_02880C_DUAL_EXPORT_ENABLE 0xFFFFFDFF
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> index 346ccc9..d787e5b 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> @@ -147,25 +147,28 @@ int bc_parser::parse_decls() {
> bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
> && sh->target == TARGET_PS;
>
> - unsigned linear = 0, persp = 0, centroid = 1;
> + bool ij_interpolators[6];
> + memset(ij_interpolators, 0, sizeof(ij_interpolators));
>
> for (unsigned i = 0; i < pshader->ninput; ++i) {
> r600_shader_io & in = pshader->input[i];
> bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
> sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
> if (ps_interp && in.spi_sid) {
> - if (in.interpolate == TGSI_INTERPOLATE_LINEAR ||
> - in.interpolate == TGSI_INTERPOLATE_COLOR)
> - linear = 1;
> - else if (in.interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
> - persp = 1;
> - if (in.centroid)
> - centroid = 2;
> + int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location);
> + if (k >= 0)
> + ij_interpolators[k] |= true;
> }
> }
>
> if (ps_interp) {
> - unsigned mask = (1 << (2 * (linear + persp) * centroid)) - 1;
> + /* add the egcm ij interpolators to live inputs */
> + unsigned num_ij = 0;
> + for (unsigned i = 0; i < Elements(ij_interpolators); i++) {
> + num_ij += ij_interpolators[i];
> + }
> +
> + unsigned mask = (1 << (2 * num_ij)) - 1;
> unsigned gpr = 0;
>
> while (mask) {
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list