[Mesa-dev] [PATCH v3 2/2] i965/fs: Handle non-const sample number in interpolateAtSample

Francisco Jerez currojerez at riseup.net
Tue Oct 6 03:15:53 PDT 2015


Neil Roberts <neil at linux.intel.com> writes:

> If a non-const sample number is given to interpolateAtSample it will
> now generate an indirect send message with the sample ID similar to
> how non-const sampler array indexing works. Previously non-const
> values were ignored and instead it ended up using a constant 0 value.
>
> The generator will try to determine if the sample ID is dynamically
> uniform via nir_src_is_dynamically_uniform. If not it will query the
> pixel interpolator in a loop, once for each possible sample number.
> This is necessary because the indirect send message doesn't seem to
> have a way to specify a different value for each fragment.
>
> The range of possible sample numbers is determined using
> STATE_NUM_SAMPLES. When linking the shader it will now add a reference
> to this state if any dynamically non-uniform calls to
> interpolateAtSample are found.
>
> This fixes the following two Piglit tests:
>
> arb_gpu_shader5-interpolateAtSample-nonconst
> arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform
>
> v2: Handle dynamically non-uniform sample ids.
> v3: Remove the BREAK instruction and predicate the WHILE directly.
>     Make the tokens arrays const.
> ---
>
> All three patches are also available in a branch here:
>
> https://github.com/bpeel/mesa/tree/wip/nonconst-interpolate-at-sample
>
>  src/mesa/drivers/dri/i965/brw_eu.h             |   2 +-
>  src/mesa/drivers/dri/i965/brw_eu_emit.c        |  34 ++++---
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp |   5 +-
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp       | 127 +++++++++++++++++++++----
>  src/mesa/drivers/dri/i965/brw_program.c        |  54 +++++++++++
>  src/mesa/drivers/dri/i965/brw_program.h        |   1 +
>  src/mesa/drivers/dri/i965/brw_shader.cpp       |   2 +
>  7 files changed, 193 insertions(+), 32 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
> index 761aa0e..0ac1ad9 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu.h
> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
> @@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
>                               struct brw_reg mrf,
>                               bool noperspective,
>                               unsigned mode,
> -                             unsigned data,
> +                             struct brw_reg data,
>                               unsigned msg_length,
>                               unsigned response_length);
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index dc699bb..9c38e99 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -3212,26 +3212,38 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
>                               struct brw_reg mrf,
>                               bool noperspective,
>                               unsigned mode,
> -                             unsigned data,
> +                             struct brw_reg data,
>                               unsigned msg_length,
>                               unsigned response_length)
>  {
>     const struct brw_device_info *devinfo = p->devinfo;
> -   struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
> +   struct brw_inst *insn;
> +   uint16_t exec_size;
>  
> -   brw_set_dest(p, insn, dest);
> -   brw_set_src0(p, insn, mrf);
> -   brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
> -                              msg_length, response_length,
> -                              false /* header is never present for PI */,
> -                              false);
> +   if (data.file == BRW_IMMEDIATE_VALUE) {
> +      insn = next_insn(p, BRW_OPCODE_SEND);
> +      brw_set_dest(p, insn, dest);
> +      brw_set_src0(p, insn, mrf);
> +      brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
> +                                 msg_length, response_length,
> +                                 false /* header is never present for PI */,
> +                                 false);
> +      brw_inst_set_pi_message_data(devinfo, insn, data.dw1.ud);
> +   } else {
> +      insn = brw_send_indirect_message(p,
> +                                       GEN7_SFID_PIXEL_INTERPOLATOR,
> +                                       dest,
> +                                       mrf,
> +                                       vec1(data));
> +      brw_inst_set_mlen(devinfo, insn, msg_length);
> +      brw_inst_set_rlen(devinfo, insn, response_length);
> +   }
>  
> -   brw_inst_set_pi_simd_mode(
> -         devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16);
> +   exec_size = brw_inst_exec_size(devinfo, p->current);
> +   brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16);
>     brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */
>     brw_inst_set_pi_nopersp(devinfo, insn, noperspective);
>     brw_inst_set_pi_message_type(devinfo, insn, mode);
> -   brw_inst_set_pi_message_data(devinfo, insn, data);
>  }
>  
>  void
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index 6f8b75e..17e19cf 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -1377,15 +1377,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
>                                                  struct brw_reg msg_data,
>                                                  unsigned msg_type)
>  {
> -   assert(msg_data.file == BRW_IMMEDIATE_VALUE &&
> -          msg_data.type == BRW_REGISTER_TYPE_UD);
> +   assert(msg_data.type == BRW_REGISTER_TYPE_UD);
>  
>     brw_pixel_interpolator_query(p,
>           retype(dst, BRW_REGISTER_TYPE_UW),
>           src,
>           inst->pi_noperspective,
>           msg_type,
> -         msg_data.dw1.ud,
> +         msg_data,
>           inst->mlen,
>           inst->regs_written);
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 03fe680..0625e44 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1180,6 +1180,47 @@ get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type)
>     }
>  }
>  
> +/* For most messages, we need one reg of ignored data; the hardware requires
> + * mlen==1 even when there is no payload. in the per-slot offset case, we'll
> + * replace this with the proper source data.
> + */
> +static void
> +setup_pixel_interpolater_instruction(fs_visitor *v,
> +                                     nir_intrinsic_instr *instr,
> +                                     fs_inst *inst,
> +                                     int mlen = 1)
> +{
> +   inst->mlen = mlen;
> +   /* 2 floats per slot returned */
> +   inst->regs_written = 2 * v->dispatch_width / 8;
> +   inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
> +      INTERP_QUALIFIER_NOPERSPECTIVE;
> +}
> +
> +static fs_reg
> +get_num_samples_reg(fs_visitor *v)
> +{
> +   struct gl_program_parameter_list *params = v->prog->Parameters;
> +   static const gl_state_index tokens[STATE_LENGTH] = {
> +      STATE_NUM_SAMPLES
> +   };
> +   GLuint index = _mesa_add_state_reference(params, tokens);
> +   unsigned i;
> +
> +   /* Try to find an existing copy of the uniform */
> +   for (i = 0; i < v->uniforms; i++) {
> +      if (v->stage_prog_data->param[i] ==
> +          &v->prog->Parameters->ParameterValues[index][0])
> +         goto found;
> +   }
> +
> +   v->stage_prog_data->param[v->uniforms++] =
> +      &v->prog->Parameters->ParameterValues[index][0];
> +
> +found:
> +   return retype(fs_reg(UNIFORM, i), BRW_REGISTER_TYPE_UD);
> +}
> +

I think this (or rather our uniform handling) is flawed in the same way
as fs_visitor::rescale_texcoord().  The UNIFORM file maps one-to-one to
the brw_stage_prog_data::param array during the SIMD8 compilation,
however before the SIMD16 compilation is run uniforms are split into
pull and push params and re-packed.  After that point the UNIFORM file
maps to either ::param or ::pull_param according to either the push_ or
pull_constant_loc[] remapping tables depending on which one of the two
is non-negative...  Sigh...

>  void
>  fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
>  {
> @@ -1584,27 +1625,81 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
>  
>        fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
>  
> -      /* For most messages, we need one reg of ignored data; the hardware
> -       * requires mlen==1 even when there is no payload. in the per-slot
> -       * offset case, we'll replace this with the proper source data.
> -       */
>        fs_reg src = vgrf(glsl_type::float_type);
> -      int mlen = 1;     /* one reg unless overriden */
>        fs_inst *inst;
>  
>        switch (instr->intrinsic) {
>        case nir_intrinsic_interp_var_at_centroid:
>           inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
>                           dst_xy, src, fs_reg(0u));
> +         setup_pixel_interpolater_instruction(this, instr, inst);
>           break;
>  
>        case nir_intrinsic_interp_var_at_sample: {
> -         /* XXX: We should probably handle non-constant sample id's */
>           nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
> -         assert(const_sample);
> -         unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
> -         inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
> -                         fs_reg(msg_data));
> +
> +         if (const_sample) {
> +            unsigned msg_data = const_sample->i[0] << 4;
> +
> +            inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
> +                            fs_reg(msg_data));
> +
> +            setup_pixel_interpolater_instruction(this, instr, inst);
> +         } else {
> +            fs_reg sample_src = retype(get_nir_src(instr->src[0]),
> +                                       BRW_REGISTER_TYPE_UD);
> +            fs_reg sample_id_reg;
> +
> +            if (nir_src_is_dynamically_uniform(instr->src[0])) {
> +               sample_id_reg = vgrf(glsl_type::uint_type);
> +               bld.SHL(sample_id_reg, sample_src, fs_reg(4u));
> +               sample_id_reg = bld.emit_uniformize(sample_id_reg);
> +               inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
> +                               sample_id_reg);
> +               setup_pixel_interpolater_instruction(this, instr, inst);
> +            } else {
> +               /* Make a loop that sends a message to the pixel interpolator
> +                * for each possible sample number so that each individual
> +                * message will be dynamically uniform. The number of samples
> +                * is determined by accessing the STATE_NUM_SAMPLES state var.
> +                */
> +               fs_reg i_reg = vgrf(glsl_type::uint_type);
> +               fs_reg sample_id_reg = vgrf(glsl_type::uint_type);
> +               fs_reg num_samples_reg = get_num_samples_reg(this);
> +
> +               bld.ADD(retype(i_reg, BRW_REGISTER_TYPE_D),
> +                       retype(num_samples_reg, BRW_REGISTER_TYPE_D),
> +                       fs_reg(-1));
> +
> +               bld.emit(BRW_OPCODE_DO);
> +
> +               bld.CMP(bld.null_reg_ud(),
> +                       sample_src, i_reg,
> +                       BRW_CONDITIONAL_EQ);
> +               bld.IF(BRW_PREDICATE_NORMAL);
> +               bld.SHL(sample_id_reg, i_reg, fs_reg(4u));
> +               sample_id_reg = bld.emit_uniformize(sample_id_reg);
> +               inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
> +                               sample_id_reg);
> +               setup_pixel_interpolater_instruction(this, instr, inst);
> +               bld.emit(BRW_OPCODE_ENDIF);
> +
> +               /* The conditional flag on instructions other than CMP* compare
> +                * the *result* of the operation to zero rather than comparing
> +                * the two sources.
> +                */
> +               set_condmod(BRW_CONDITIONAL_GE,
> +                           bld.ADD(retype(i_reg, BRW_REGISTER_TYPE_D),
> +                                   retype(i_reg, BRW_REGISTER_TYPE_D),
> +                                   fs_reg(-1)));
> +
> +               /* The predicate means that the loop will continue if i is now
> +                * >= 0
> +                */
> +               set_predicate(BRW_PREDICATE_NORMAL, bld.emit(BRW_OPCODE_WHILE));
> +            }
> +         }
> +
>           break;
>        }
>  
> @@ -1617,6 +1712,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
>  
>              inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
>                              fs_reg(off_x | (off_y << 4)));
> +            setup_pixel_interpolater_instruction(this, instr, inst);
>           } else {
>              src = vgrf(glsl_type::ivec2_type);
>              fs_reg offset_src = retype(get_nir_src(instr->src[0]),
> @@ -1646,9 +1742,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
>                             bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
>              }
>  
> -            mlen = 2 * dispatch_width / 8;
>              inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
>                              fs_reg(0u));
> +            setup_pixel_interpolater_instruction(this,
> +                                                 instr,
> +                                                 inst,
> +                                                 2 * dispatch_width / 8);
>           }
>           break;
>        }
> @@ -1657,12 +1756,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
>           unreachable("Invalid intrinsic");
>        }
>  
> -      inst->mlen = mlen;
> -      /* 2 floats per slot returned */
> -      inst->regs_written = 2 * dispatch_width / 8;
> -      inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
> -                               INTERP_QUALIFIER_NOPERSPECTIVE;
> -
>        for (unsigned j = 0; j < instr->num_components; j++) {
>           fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
>           src.type = dest.type;
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
> index a034dac..2d7966c 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -144,6 +144,8 @@ brwProgramStringNotify(struct gl_context *ctx,
>  
>        prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
>  
> +      brw_add_interpolate_at_sample_params(prog);
> +
>        brw_fs_precompile(ctx, NULL, prog);
>        break;
>     }
> @@ -242,6 +244,58 @@ brw_add_texrect_params(struct gl_program *prog)
>     }
>  }
>  
> +static bool
> +find_interpolate_at_sample_in_block(nir_block *block,
> +                                    void *data)
> +{
> +   nir_foreach_instr(block, instr) {
> +      if (instr->type != nir_instr_type_intrinsic)
> +         continue;
> +
> +      nir_intrinsic_instr *intrinsic_instr = nir_instr_as_intrinsic(instr);
> +
> +      if (intrinsic_instr->intrinsic != nir_intrinsic_interp_var_at_sample)
> +         continue;
> +
> +      /* If the sample number is known to be dynamically uniform then
> +       * the generator won't need the num_samples state.
> +       */
> +      if (nir_src_is_dynamically_uniform(intrinsic_instr->src[0]))
> +         continue;
> +
> +      return false;
> +   }
> +
> +   return true;
> +}
> +
> +void
> +brw_add_interpolate_at_sample_params(struct gl_program *prog)
> +{
> +   static const gl_state_index tokens[STATE_LENGTH] = {
> +      STATE_NUM_SAMPLES
> +   };
> +
> +   if (!prog->nir)
> +      return;
> +
> +   /* If anything calls interpolateAtSample with a dynamically non-uniform
> +    * sample ID then we need STATE_NUM_SAMPLES to be able to iterate over
> +    * each possible value.
> +    */
> +   nir_foreach_overload(prog->nir, overload) {
> +      if (overload->impl) {
> +         bool found = !nir_foreach_block(overload->impl,
> +                                         find_interpolate_at_sample_in_block,
> +                                         ralloc_parent(overload->impl));
> +         if (found) {
> +            _mesa_add_state_reference(prog->Parameters, tokens);
> +            break;
> +         }
> +      }
> +   }
> +}
> +
>  /* Per-thread scratch space is a power-of-two multiple of 1KB. */
>  int
>  brw_get_scratch_size(int size)
> diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
> index cf0522a..d68eecc 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.h
> +++ b/src/mesa/drivers/dri/i965/brw_program.h
> @@ -164,6 +164,7 @@ bool brw_debug_recompile_sampler_key(struct brw_context *brw,
>                                       const struct brw_sampler_prog_key_data *old_key,
>                                       const struct brw_sampler_prog_key_data *key);
>  void brw_add_texrect_params(struct gl_program *prog);
> +void brw_add_interpolate_at_sample_params(struct gl_program *prog);
>  
>  void
>  brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 7bc080b..1e98881 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -388,6 +388,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
>        prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
>                                   is_scalar_shader_stage(compiler, stage));
>  
> +      brw_add_interpolate_at_sample_params(prog);
> +
>        _mesa_reference_program(ctx, &prog, NULL);
>     }
>  
> -- 
> 1.9.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20151006/9bc09921/attachment-0001.sig>


More information about the mesa-dev mailing list