[Mesa-dev] [PATCH V2 08/12] i965: Add FS backend for builtin gl_SampleID
Paul Berry
stereotype441 at gmail.com
Mon Oct 28 23:23:38 CET 2013
On 25 October 2013 16:45, Anuj Phogat <anuj.phogat at gmail.com> wrote:
> V2:
> - Update comments
> - Make changes to support simd16 mode.
> - Add compute_sample_id variables in brw_wm_prog_key
> - Add a special backend instruction to compute sample_id.
>
> Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
> ---
> src/mesa/drivers/dri/i965/brw_defines.h | 1 +
> src/mesa/drivers/dri/i965/brw_fs.cpp | 49
> ++++++++++++++++++++++++++
> src/mesa/drivers/dri/i965/brw_fs.h | 7 ++++
> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 27 ++++++++++++++
> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 ++
> src/mesa/drivers/dri/i965/brw_wm.c | 6 ++++
> src/mesa/drivers/dri/i965/brw_wm.h | 1 +
> 7 files changed, 93 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 5ba9d45..f3c994b 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -788,6 +788,7 @@ enum opcode {
> FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
> FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
> FS_OPCODE_DISCARD_JUMP,
> + FS_OPCODE_SET_SAMPLE_ID,
> FS_OPCODE_SET_SIMD4X2_OFFSET,
> FS_OPCODE_PACK_HALF_2x16_SPLIT,
> FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 0f8213e..5773c6f 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1176,6 +1176,55 @@ fs_visitor::emit_samplepos_setup(ir_variable *ir)
> return reg;
> }
>
> +fs_reg *
> +fs_visitor::emit_sampleid_setup(ir_variable *ir)
> +{
> + assert(brw->gen >= 6);
> +
> + this->current_annotation = "compute sample id";
> + fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
> +
> + if (c->key.compute_sample_id) {
> + fs_reg t1 = fs_reg(this, glsl_type::int_type);
> + fs_reg t2 = fs_reg(this, glsl_type::int_type);
> + t2.type = BRW_REGISTER_TYPE_UW;
> +
> + /* The PS will be run in MSDISPMODE_PERSAMPLE. For example with
> + * 8x multisampling, subspan 0 will represent sample N (where N
> + * is 0, 2, 4 or 6), subspan 1 will represent sample 1, 3, 5 or
> + * 7. We can find the value of N by looking at R0.0 bits 7:6
> + * ("Starting Sample Pair Index (SSPI)") and multiplying by two
> + * (since samples are always delivered in pairs). That is, we
> + * compute 2*((R0.0 & 0xc0) >> 6) == (R0.0 & 0xc0) >> 5. Then
> + * we need to add N to the sequence (0, 0, 0, 0, 1, 1, 1, 1) in
> + * case of SIMD8 and sequence (0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2,
> + * 2, 3, 3, 3, 3) in case of SIMD16. We compute this sequence by
> + * populating a temporary variable with the sequence (0, 1, 2, 3),
> + * and then reading from it using vstride=1, width=4, hstride=0.
> + * These computations hold good for 4x multisampling as well.
> + */
> + emit(BRW_OPCODE_AND, t1,
> + fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
> + fs_reg(brw_imm_d(0xc0)));
> + emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5));
> + /* This works for both SIMD8 and SIMD16 */
> + emit(MOV(t2, brw_imm_v(0x3210)));
>
These three instructions need to be emitted with force_writemask_all =
true, just in case the fragment shader is dispatched with channel 0
disabled.
> + /* This special instruction takes care of setting vstride=1,
> + * width=4, hstride=0 of t2 during an ADD instruction.
> + */
> + emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2);
>
I don't believe this will work, because type conversions can only be done
in MOV instructions. reg and t1 are UD, but FS_OPCODE_SET_SAMPLE_ID is
going to cast t2 as UW. Have you tried running this code in the simulator
to see if it complains?
I believe what you need to do is use a MOV to copy the (0, 1, 2, 3)
sequence to a full-size UD register, and then do the ADD as a final
instruction.
(Note that the reason this wasn't necessary in blorp is that blorp uses UW
for a lot of its temporary registers, so in the corresponding blorp code
there's no type conversion going on).
> + }
> + else {
>
Style nit-pick: generally we put "} else {" on a single line.
> + /* As per GL_ARB_sample_shading specification:
> + * "When rendering to a non-multisample buffer, or if multisample
> + * rasterization is disabled, gl_SampleID will always be zero."
> + */
> + emit(BRW_OPCODE_MOV, *reg, fs_reg(0));
> + }
> +
> + return reg;
> +}
> +
> fs_reg
> fs_visitor::fix_math_operand(fs_reg src)
> {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index db5df39..8a1a414 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -334,6 +334,7 @@ public:
> bool is_centroid);
> fs_reg *emit_frontfacing_interpolation(ir_variable *ir);
> fs_reg *emit_samplepos_setup(ir_variable *ir);
> + fs_reg *emit_sampleid_setup(ir_variable *ir);
> fs_reg *emit_general_interpolation(ir_variable *ir);
> void emit_interpolation_setup_gen4();
> void emit_interpolation_setup_gen6();
> @@ -538,6 +539,12 @@ private:
> struct brw_reg index,
> struct brw_reg offset);
> void generate_mov_dispatch_to_flags(fs_inst *inst);
> +
> + void generate_set_sample_id(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg src0,
> + struct brw_reg src1);
> +
> void generate_set_simd4x2_offset(fs_inst *inst,
> struct brw_reg dst,
> struct brw_reg offset);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index fa15f7b..4f15ed7 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -1024,6 +1024,29 @@ fs_generator::generate_set_simd4x2_offset(fs_inst
> *inst,
> brw_pop_insn_state(p);
> }
>
> +/* Sets vstride=1, width=4, hstride=0 of register src1 during
> + * the ADD instruction.
> + */
> +void
> +fs_generator::generate_set_sample_id(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg src0,
> + struct brw_reg src1)
> +{
> + assert(dst.type == BRW_REGISTER_TYPE_D ||
> + dst.type == BRW_REGISTER_TYPE_UD);
> + assert(src0.type == BRW_REGISTER_TYPE_D ||
> + src0.type == BRW_REGISTER_TYPE_UD);
> + if (dispatch_width == 16)
> + dst = vec16(dst);
> + brw_push_insn_state(p);
> + brw_set_compression_control(p, BRW_COMPRESSION_NONE);
> + brw_set_mask_control(p, BRW_MASK_DISABLE);
> + brw_ADD(p, dst, src0, stride(retype(brw_vec1_reg(src1.file, src1.nr,
> 0),
> + BRW_REGISTER_TYPE_UW), 1, 4, 0));
>
+ brw_pop_insn_state(p);
> +}
> +
> /**
> * Change the register's data type from UD to W, doubling the strides in
> order
> * to compensate for halving the data type width.
> @@ -1553,6 +1576,10 @@ fs_generator::generate_code(exec_list *instructions)
> generate_set_simd4x2_offset(inst, dst, src[0]);
> break;
>
> + case FS_OPCODE_SET_SAMPLE_ID:
> + generate_set_sample_id(inst, dst, src[0], src[1]);
> + break;
> +
> case FS_OPCODE_PACK_HALF_2x16_SPLIT:
> generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
> break;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 51972fe..7a6a0b5 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -129,6 +129,8 @@ fs_visitor::visit(ir_variable *ir)
> } else if (ir->mode == ir_var_system_value) {
> if (!strcmp(ir->name, "gl_SamplePosition")) {
> reg = emit_samplepos_setup(ir);
> + } else if (!strcmp(ir->name, "gl_SampleID")) {
> + reg = emit_sampleid_setup(ir);
>
As in the previous patch, I believe you can just use ir->location rather
than doing a strcmp on the name.
> }
> }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c
> b/src/mesa/drivers/dri/i965/brw_wm.c
> index d2a5a9f..afb62fb 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -367,6 +367,7 @@ static void brw_wm_populate_key( struct brw_context
> *brw,
> GLuint lookup = 0;
> GLuint line_aa;
> bool program_uses_dfdy = fp->program.UsesDFdy;
> + bool multisample_fbo = ctx->DrawBuffer->Visual.samples > 1;
>
> memset(key, 0, sizeof(*key));
>
> @@ -489,6 +490,11 @@ static void brw_wm_populate_key( struct brw_context
> *brw,
> _mesa_get_min_invocations_per_fragment(ctx, &fp->program) > 1 &&
> fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
>
> + key->compute_sample_id =
> + multisample_fbo &&
> + ctx->Multisample.Enabled &&
> + fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID;
>
I always forget the precedence rules between & and &&. Can we put
parenthesis around (fp->program.Base.SystemValuesRead &
SYSTEM_BIT_SAMPLE_ID) just to be on the safe side?
> +
> /* BRW_NEW_VUE_MAP_GEOM_OUT */
> if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
> BRW_FS_VARYING_INPUT_MASK) > 16)
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.h
> b/src/mesa/drivers/dri/i965/brw_wm.h
> index eb740ad..f5823f4 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.h
> +++ b/src/mesa/drivers/dri/i965/brw_wm.h
> @@ -66,6 +66,7 @@ struct brw_wm_prog_key {
> GLuint render_to_fbo:1;
> GLuint clamp_fragment_color:1;
> GLuint compute_pos_offset:1;
> + GLuint compute_sample_id:1;
> GLuint line_aa:2;
> GLuint high_quality_derivatives:1;
>
> --
> 1.8.1.4
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131028/dcb4e981/attachment-0001.html>
More information about the mesa-dev
mailing list