[Mesa-dev] [PATCH 20/21] i965/fs: Introduce scalarizing SVEC4 IR builder.
Francisco Jerez
currojerez at riseup.net
Tue Apr 28 10:08:36 PDT 2015
See "i965/fs: Introduce FS IR builder." for the rationale.
---
src/mesa/drivers/dri/i965/brw_fs_builder.h | 426 +++++++++++++++++++++++++++++
1 file changed, 426 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index 6b36d1f..0368d2b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -677,6 +677,432 @@ namespace brw {
const void *base_ir;
/** @} */
};
+
+ /**
+ * Toolbox to assemble an FS IR program out of vector instructions,
+ * scalarizing them on emission. It's meant to be largely compatible with
+ * brw::vec4_builder in order to enable generic FS/VEC4 programming.
+ */
+ class svec4_builder {
+ public:
+ /** Type used in this IR to represent a source of an instruction. */
+ typedef src_svec4 src_reg;
+
+ /** Type used in this IR to represent the destination of an instruction. */
+ typedef dst_svec4 dst_reg;
+
+ /** Type used in this IR to represent an instruction. */
+ typedef svec4_inst instruction;
+
+ /** You can use this to do scalar operations on the same IR. */
+ typedef fs_builder scalar_builder;
+
+ /** We build vector instructions. */
+ typedef svec4_builder vector_builder;
+
+ /**
+ * Construct a scalarizing vector builder stacked on top of a scalar
+ * builder.
+ */
+ svec4_builder(const fs_builder &bld) :
+ devinfo(bld.devinfo), bld(bld)
+ {
+ }
+
+ /**
+ * Construct a scalar builder inheriting other code generation
+ * parameters from this.
+ */
+ const fs_builder &
+ scalar() const
+ {
+ return bld;
+ }
+
+ /**
+ * Construct a vector builder inheriting other code generation
+ * parameters from this.
+ */
+ svec4_builder
+ vector() const
+ {
+ return *this;
+ }
+
+ /**
+ * Construct a builder of half-SIMD-width instructions inheriting other
+ * code generation parameters from this. Predication and control flow
+ * masking will use the enable signals for the i-th half.
+ */
+ svec4_builder
+ half(unsigned i) const
+ {
+ return svec4_builder(bld.half(i));
+ }
+
+ /**
+ * Get the SIMD width in use.
+ */
+ unsigned
+ dispatch_width() const
+ {
+ return bld.dispatch_width();
+ }
+
+ /**
+ * Get the lowered predicate to be used to interpret the flag result
+ * written by a reduced SVEC4 instruction (i.e. having called
+ * brw::exec_reduce() on the instruction with \p pred as argument).
+ * This can be used to "map" an ALIGN16 predication mode into an ALIGN1
+ * mode, allowing vector comparisons in the scalar back-end.
+ *
+ * \sa brw::exec_reduce().
+ */
+ static brw_predicate
+ reduced_predicate(brw_predicate pred)
+ {
+ return (pred == BRW_PREDICATE_NONE ? BRW_PREDICATE_NONE :
+ BRW_PREDICATE_NORMAL);
+ }
+
+ /**
+ * Allocate a virtual register of natural vector size and SIMD width.
+ * \p n gives the amount of space to allocate in dispatch_width units
+ * (which is just enough space for one logical component in this IR).
+ */
+ dst_reg
+ natural_reg(brw_reg_type type, unsigned n = 4) const
+ {
+ return resize(dst_reg(bld.natural_reg(type, n)), n);
+ }
+
+ /**
+ * Create a register of natural vector size and SIMD width using array
+ * \p reg as storage.
+ */
+ dst_reg
+ natural_reg(const array_reg ®) const
+ {
+ return bld.natural_reg(reg);
+ }
+
+ /**
+ * Allocate a virtual register of vector size one and natural SIMD
+ * width.
+ */
+ dst_reg
+ scalar_reg(brw_reg_type type) const
+ {
+ return dst_reg(bld.natural_reg(type), WRITEMASK_X);
+ }
+
+ /**
+ * Allocate a raw chunk of memory from the virtual GRF file with no
+ * special vector size or SIMD width. \p n is given in units of 32B
+ * registers.
+ */
+ ::array_reg
+ array_reg(enum brw_reg_type type, unsigned n) const
+ {
+ return bld.array_reg(type, n);
+ }
+
+ /**
+ * Create a null register of floating type.
+ */
+ dst_reg
+ null_reg_f() const
+ {
+ return dst_reg(retype(brw_null_vec(dispatch_width()),
+ BRW_REGISTER_TYPE_F));
+ }
+
+ /**
+ * Create a null register of signed integer type.
+ */
+ dst_reg
+ null_reg_d() const
+ {
+ return dst_reg(retype(brw_null_vec(dispatch_width()),
+ BRW_REGISTER_TYPE_D));
+ }
+
+ /**
+ * Create a null register of unsigned integer type.
+ */
+ dst_reg
+ null_reg_ud() const
+ {
+ return dst_reg(retype(brw_null_vec(dispatch_width()),
+ BRW_REGISTER_TYPE_UD));
+ }
+
+ /**
+ * Create and insert a nullary control instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode) const
+ {
+ instruction *inst = new(bld.mem_ctx) instruction;
+ inst->v[0] = bld.emit(opcode);
+ return inst;
+ }
+
+ /**
+ * Create and insert a nullary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst) const
+ {
+ instruction *inst = new(bld.mem_ctx) instruction;
+
+ for (unsigned i = 0; i < 4; ++i) {
+ if (dst.writemask & (1 << i))
+ inst->v[i] = bld.emit(opcode, component(dst, i));
+ }
+
+ return inst;
+ }
+
+ /**
+ * Create and insert a unary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
+ {
+ instruction *inst = new(bld.mem_ctx) instruction;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+ if (dst.writemask & (1 << i))
+ inst->v[i] = bld.emit(opcode, component(dst, i), component(src0, i));
+ }
+
+ return inst;
+ }
+
+ /**
+ * Create and insert a binary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1) const
+ {
+ instruction *inst = new(bld.mem_ctx) instruction;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+ if (dst.writemask & (1 << i))
+ inst->v[i] = bld.emit(opcode, component(dst, i), component(src0, i),
+ component(src1, i));
+ }
+
+ return inst;
+ }
+
+ /**
+ * Create and insert a ternary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1, const src_reg &src2) const
+ {
+ instruction *inst = new(bld.mem_ctx) instruction;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+ if (dst.writemask & (1 << i))
+ inst->v[i] = bld.emit(opcode, component(dst, i), component(src0, i),
+ component(src1, i), component(src2, i));
+ }
+
+ return inst;
+ }
+
+ /**
+ * Insert a preallocated instruction into the program.
+ */
+ instruction *
+ emit(instruction *inst) const
+ {
+ for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+ if (inst->v[i])
+ bld.emit(inst->v[i]);
+ }
+
+ return inst;
+ }
+
+ /**
+ * Select \p src0 if the comparison of both sources with the given
+ * conditional mod evaluates to true, otherwise select \p src1.
+ *
+ * Generally useful to get the minimum or maximum of two values.
+ */
+ void
+ emit_minmax(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1, brw_conditional_mod mod) const
+ {
+ for (unsigned i = 0; i < ARRAY_SIZE(svec4_inst::v); ++i) {
+ if (dst.writemask & (1 << i))
+ bld.emit_minmax(component(dst, i), component(src0, i),
+ component(src1, i), mod);
+ }
+ }
+
+ /**
+ * Copy any live channel from \p src to the first channel of \p dst.
+ */
+ void
+ emit_uniformize(const dst_reg &dst, const src_reg &src0) const
+ {
+ for (unsigned i = 0; i < ARRAY_SIZE(svec4_inst::v); ++i) {
+ if (dst.writemask & (1 << i))
+ bld.emit_uniformize(component(dst, i), component(src0, i));
+ }
+ }
+
+ /**
+ * Assorted arithmetic ops.
+ * @{
+ */
+#define ALU1(op) \
+ instruction * \
+ op(const dst_reg &dst, const src_reg &src0) const \
+ { \
+ return emit(BRW_OPCODE_##op, dst, src0); \
+ }
+
+#define ALU2(op) \
+ instruction * \
+ op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+ { \
+ return emit(BRW_OPCODE_##op, dst, src0, src1); \
+ }
+
+#define ALU3(op) \
+ instruction * \
+ op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \
+ const src_reg &src2) const \
+ { \
+ return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \
+ }
+
+ ALU2(ADD)
+ ALU2(AND)
+ ALU2(ASR)
+ ALU2(AVG)
+ ALU3(BFE)
+ ALU2(BFI1)
+ ALU3(BFI2)
+ ALU1(BFREV)
+ ALU1(CBIT)
+ ALU2(CMPN)
+ ALU3(CSEL)
+ ALU2(DP2)
+ ALU2(DP3)
+ ALU2(DP4)
+ ALU2(DPH)
+ ALU1(F16TO32)
+ ALU1(F32TO16)
+ ALU1(FBH)
+ ALU1(FBL)
+ ALU1(FRC)
+ ALU2(LINE)
+ ALU1(LZD)
+ ALU2(MAC)
+ ALU3(MAD)
+ ALU1(MOV)
+ ALU2(MUL)
+ ALU1(NOT)
+ ALU2(OR)
+ ALU2(PLN)
+ ALU1(RNDD)
+ ALU1(RNDE)
+ ALU1(RNDU)
+ ALU1(RNDZ)
+ ALU2(SAD2)
+ ALU2(SEL)
+ ALU2(SHL)
+ ALU2(SHR)
+ ALU2(XOR)
+
+#undef ALU3
+#undef ALU2
+#undef ALU1
+ /** @} */
+
+ /**
+ * CMP: Sets the low bit of the destination channels with the result
+ * of the comparison, while the upper bits are undefined, and updates
+ * the flag register with the packed 16 bits of the result.
+ */
+ instruction *
+ CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
+ brw_conditional_mod condition) const
+ {
+ instruction *inst = new(bld.mem_ctx) instruction;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+ if (dst.writemask & (1 << i))
+ bld.CMP(component(dst, i), component(src0, i), component(src1, i),
+ condition);
+ }
+
+ return inst;
+ }
+
+ /**
+ * Gen4 predicated IF.
+ */
+ instruction *
+ IF(brw_predicate predicate) const
+ {
+ instruction *inst = new(bld.mem_ctx) instruction;
+ inst->v[0] = bld.IF(predicate);
+ return inst;
+ }
+
+ /**
+ * Gen6 IF with embedded comparison.
+ */
+ instruction *
+ IF(const src_reg &src0, const src_reg &src1,
+ brw_conditional_mod condition) const
+ {
+ assert(brw_is_single_value_swizzle(src0.swizzle) &&
+ brw_is_single_value_swizzle(src1.swizzle));
+ instruction *inst = new(bld.mem_ctx) instruction;
+ inst->v[0] = bld.IF(component(src0, 0), component(src1, 0), condition);
+ return inst;
+ }
+
+ /**
+ * Emit a linear interpolation instruction.
+ */
+ instruction *
+ LRP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
+ const src_reg &src2) const
+ {
+ instruction *inst = new(bld.mem_ctx) instruction;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+ if (dst.writemask & (1 << i))
+ bld.LRP(component(dst, i), component(src0, i), component(src1, i),
+ component(src2, i));
+ }
+
+ return inst;
+ }
+
+ const brw_device_info *const devinfo;
+
+ private:
+ fs_builder bld;
+ };
+
+ svec4_builder
+ fs_builder::vector() const
+ {
+ return vector_builder(*this);
+ }
}
#endif
--
2.3.5
More information about the mesa-dev
mailing list