[Mesa-dev] [PATCH V5 6/6] i965/fs: add support for ir_*_interpolate_at_* expressions

Kenneth Graunke kenneth at whitecape.org
Sat Jul 12 14:03:35 PDT 2014


On Saturday, July 12, 2014 09:16:25 PM Chris Forbes wrote:
> SIMD8-only for now.
> 
> V5: - Fix style complaints
>     - Move prototype to be with other oddball emit functions
>     - Use unreachable() instead of assert() where possible
> 
> Signed-off-by: Chris Forbes <chrisf at ijw.co.nz>
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h           |   2 +
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 133 
++++++++++++++++++++++++++-
>  2 files changed, 133 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
> index 1d3f9d0..6f169dc 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -439,6 +439,8 @@ public:
>     void emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
>                                    fs_reg offset);
>  
> +   void emit_interpolate_expression(ir_expression *ir);
> +
>     bool try_rewrite_rhs_to_dst(ir_assignment *ir,
>  			       fs_reg dst,
>  			       fs_reg src,
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 8e8affa..fed5f28 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -344,6 +344,116 @@ fs_visitor::try_emit_mad(ir_expression *ir)
>     return true;
>  }
>  
> +static int
> +pack_pixel_offset(float x)
> +{
> +   int n = MIN2((int)(x * 16), 7);
> +   return n & 0xf;
> +}
> +
> +void
> +fs_visitor::emit_interpolate_expression(ir_expression *ir)
> +{
> +   /* in SIMD16 mode, the pixel interpolator returns coords interleaved
> +    * 8 channels at a time, same as the barycentric coords presented in
> +    * the FS payload. this requires a bit of extra work to support.
> +    */
> +   no16("interpolate_at_* not yet supported in SIMD16 mode.");
> +
> +   ir_dereference * deref = ir->operands[0]->as_dereference();
> +   ir_swizzle * swiz = NULL;
> +   if (!deref) {
> +      /* the api does not allow a swizzle here, but the varying packing 
code
> +       * may have pushed one into here.
> +       */
> +      swiz = ir->operands[0]->as_swizzle();
> +      assert(swiz);
> +      deref = swiz->val->as_dereference();
> +   }
> +   assert(deref);
> +   ir_variable * var = deref->variable_referenced();
> +   assert(var);
> +
> +   /* 1. collect interpolation factors */
> +
> +   fs_reg dst_x = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 
2, 1));
> +   fs_reg dst_y = dst_x;
> +   dst_y.reg_offset++;
> +
> +   /* for most messages, we need one reg of ignored data; the hardware 
requires mlen==1
> +    * even when there is no payload. in the per-slot offset case, we'll 
replace this with
> +    * the proper source data. */
> +   fs_reg src = fs_reg(this, glsl_type::float_type);
> +   int mlen = 1;     /* one reg unless overriden */
> +   fs_inst *inst;
> +
> +   switch (ir->operation) {
> +   case ir_unop_interpolate_at_centroid:
> +      inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, 
fs_reg(0u));
> +      break;
> +
> +   case ir_binop_interpolate_at_sample: {
> +      ir_constant *sample_num = ir->operands[1]->as_constant();
> +      assert(sample_num || !"nonconstant sample number should have been 
lowered.");
> +
> +      unsigned msg_data = sample_num->value.i[0] << 4;
> +      inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, 
fs_reg(msg_data));
> +      break;
> +   }
> +
> +   case ir_binop_interpolate_at_offset: {
> +      ir_constant *const_offset = ir->operands[1]->as_constant();
> +      if (const_offset) {
> +         unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) |
> +                            (pack_pixel_offset(const_offset->value.f[1]) << 
4);
> +         inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src,
> +                     fs_reg(msg_data));
> +      } else {
> +         /* pack the operands: hw wants offsets as 4 bit signed ints */
> +         ir->operands[1]->accept(this);
> +         src = fs_reg(this, glsl_type::ivec2_type);
> +         fs_reg src2 = src;
> +         for (int i = 0; i < 2; i++) {
> +            fs_reg temp = fs_reg(this, glsl_type::float_type);
> +            emit(MUL(temp, this->result, fs_reg(16.0f)));
> +            emit(MOV(src2, temp));  /* float to int */
> +            fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7));
> +            inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */

There's something unusual going on here that I think deserves a comment:

GL_ARB_gpu_shader5 requires GL_MIN_FRAGMENT_INTERPOLATION_OFFSET and 
GL_MAX_FRAGMENT_INTERPOLATION_OFFSET to be -0.5 and 0.5, respectively; offsets 
outside of that range produce undefined values.

However, our hardware actually only supports -8/16 to 7/16 = [-0.5, 0.4375].
So if the application used an offset of +0.5, the integer value would be +8 
aka 0b1000, which when interpreted as an S4 2's complement number would be -8, 
aka -8/16 = -0.5.  The exact opposite of what they wanted.

So, the clamp here is not to try and deal with offsets that are out of the 
advertised range - it's to make +0.5 turn into +0.4375.  Which I believe is 
legal due to the fuzzy quantization rules in ARB_gpu_shader5:

"Not all values of <offset> may be supported; x and y offsets may be rounded
 to fixed-point values with the number of fraction bits given by the
 implementation-dependent constant FRAGMENT_INTERPOLATION_OFFSET_BITS."

> +
> +            src2.reg_offset++;
> +            this->result.reg_offset++;
> +         }
> +
> +         mlen = 2 * dispatch_width / 8;

Normally we create "int reg_width = dispatch_width / 8" and use that in 
expressions like this.  Might be a touch tidier.  Either way's fine though.

With some sort of comment added explaining the above, this series gets a:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

and I think you're good to go on it.  Nice work!

> +         inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src,
> +                     fs_reg(0u));
> +      }
> +      break;
> +   }
> +
> +   default:
> +      unreachable("not reached");
> +   }
> +
> +   inst->mlen = mlen;
> +   inst->regs_written = 2 * dispatch_width / 8; /* 2 floats per slot 
returned */
> +   inst->pi_noperspective = var->determine_interpolation_mode(key-
>flat_shade) ==
> +         INTERP_QUALIFIER_NOPERSPECTIVE;
> +
> +   /* 2. emit linterp */
> +
> +   fs_reg res(this, ir->type);
> +   this->result = res;
> +
> +   for (int i = 0; i < ir->type->vector_elements; i++) {
> +      int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i;
> +      emit(FS_OPCODE_LINTERP, res,
> +           dst_x, dst_y,
> +           fs_reg(interp_reg(var->data.location, ch)));
> +      res.reg_offset++;
> +   }
> +}
> +
>  void
>  fs_visitor::visit(ir_expression *ir)
>  {
> @@ -355,9 +465,22 @@ fs_visitor::visit(ir_expression *ir)
>  
>     if (try_emit_saturate(ir))
>        return;
> -   if (ir->operation == ir_binop_add) {
> +
> +   /* Deal with the real oddball stuff first */
> +   switch (ir->operation) {
> +   case ir_binop_add:
>        if (try_emit_mad(ir))
> -	 return;
> +         return;
> +      break;
> +
> +   case ir_unop_interpolate_at_centroid:
> +   case ir_binop_interpolate_at_offset:
> +   case ir_binop_interpolate_at_sample:
> +      emit_interpolate_expression(ir);
> +      return;
> +
> +   default:
> +      break;
>     }
>  
>     for (operand = 0; operand < ir->get_num_operands(); operand++) {
> @@ -815,6 +938,12 @@ fs_visitor::visit(ir_expression *ir)
>        inst = emit(BRW_OPCODE_SEL, this->result, op[1], op[2]);
>        inst->predicate = BRW_PREDICATE_NORMAL;
>        break;
> +
> +   case ir_unop_interpolate_at_centroid:
> +   case ir_binop_interpolate_at_offset:
> +   case ir_binop_interpolate_at_sample:
> +      unreachable("already handled above");
> +      break;
>     }
>  }
>  
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140712/dda5bb5f/attachment.sig>


More information about the mesa-dev mailing list