[Mesa-dev] [PATCH 09/23] i965: Import surface message builder functions.

Tue Apr 28 11:44:20 PDT 2015

Implement helper functions that can be used to construct and send
untyped and typed surface read, write and atomic messages to the
shared dataport unit.
---
 src/mesa/drivers/dri/i965/brw_ir_surface_builder.h | 268 +++++++++++++++++++++
 1 file changed, 268 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h
index b6890b4..e24e484 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h
@@ -283,6 +283,274 @@ namespace brw {
                        size);
       }
    }
+
+   namespace surface_access {
+      namespace detail {
+         using namespace array_utils;
+
+         /**
+          * Generate a send opcode for a surface message and return the
+          * result.
+          */
+         template<typename B, typename S>
+         array_reg
+         emit_send(const B &vbld, enum opcode opcode,
+                   const array_reg &payload, const S &surface, const S &arg,
+                   unsigned rlen, brw_predicate pred = BRW_PREDICATE_NONE)
+         {
+            const typename B::scalar_builder bld = vbld.scalar();
+            const typename B::scalar_builder::dst_reg usurface =
+               bld.scalar_reg(BRW_REGISTER_TYPE_UD);
+            const array_reg dst =
+               (rlen ? bld.array_reg(BRW_REGISTER_TYPE_UD, rlen) :
+                array_reg(bld.null_reg_ud()));
+
+            /* Reduce the dynamically uniform surface index to a single
+             * scalar.
+             */
+            bld.emit_uniformize(usurface, component(surface, 0));
+
+            typename B::scalar_builder::instruction *inst =
+               bld.emit(opcode, bld.natural_reg(dst), bld.natural_reg(payload),
+                        usurface, component(arg, 0));
+            inst->mlen = payload.size;
+            inst->regs_written = rlen;
+            inst->predicate = pred;
+
+            return dst;
+         }
+
+         /**
+          * Initialize the header present in untyped surface messages.
+          */
+         inline array_reg
+         emit_untyped_message_header(const svec4_builder &bld)
+         {
+            fs_builder ubld = bld.scalar().half(0);
+            const fs_reg dst = ubld.scalar_reg(BRW_REGISTER_TYPE_UD);
+            exec_all(ubld.MOV(dst, fs_reg(0)));
+            exec_all(ubld.MOV(channel(dst, 7), ubld.sample_mask_reg()));
+            return array_reg(dst);
+         }
+
+         inline array_reg
+         emit_untyped_message_header(const vec4_builder &bld)
+         {
+            return array_reg();
+         }
+      }
+
+      /**
+       * Emit an untyped surface read opcode.  \p dims determines the number
+       * of components of the address and \p size the number of components of
+       * the returned value.
+       */
+      template<typename B, typename S>
+      S
+      emit_untyped_read(const B &bld, const S &surface, const S &addr,
+                        unsigned dims, unsigned size,
+                        brw_predicate pred = BRW_PREDICATE_NONE)
+      {
+         using namespace detail;
+         const vector_layout layout(bld, true, true);
+         const array_reg payload =
+            emit_collect(bld,
+                         emit_untyped_message_header(bld),
+                         emit_insert(layout, bld, addr, dims));
+         const unsigned rlen = (DIV_ROUND_UP(size, S::traits::chan_size) *
+                                bld.dispatch_width() / 8);
+         const array_reg dst =
+            emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ,
+                      payload, surface, S(size), rlen, pred);
+
+         return emit_extract(layout, bld, &dst, size);
+      }
+
+      /**
+       * Emit an untyped surface write opcode.  \p dims determines the number
+       * of components of the address and \p size the number of components of
+       * the argument.
+       */
+      template<typename B, typename S>
+      void
+      emit_untyped_write(const B &bld, const S &surface, const S &addr, const S &src,
+                         unsigned dims, unsigned size,
+                         brw_predicate pred = BRW_PREDICATE_NONE)
+      {
+         using namespace detail;
+         const vector_layout layout(
+            bld, bld.devinfo->gen >= 8 || bld.devinfo->is_haswell, true);
+         const array_reg payload =
+            emit_collect(bld,
+                         emit_untyped_message_header(bld),
+                         emit_insert(layout, bld, addr, dims),
+                         emit_insert(layout, bld, src, size));
+
+         emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
+                   payload, surface, S(size), 0, pred);
+      }
+
+      /**
+       * Emit an untyped surface atomic opcode.  \p dims determines the number
+       * of components of the address and \p rsize the number of components of
+       * the returned value (either zero or one).
+       */
+      template<typename B, typename S>
+      S
+      emit_untyped_atomic(const B &bld, const S &surface, const S &addr,
+                          const S &src0, const S &src1,
+                          unsigned dims, unsigned rsize, unsigned op,
+                          brw_predicate pred = BRW_PREDICATE_NONE)
+      {
+         using namespace detail;
+         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+         const vector_layout layout(
+            bld, bld.devinfo->gen >= 8 || bld.devinfo->is_haswell, true);
+         /* Zip the components of both sources, they are represented as the X
+          * and Y components of the same vector.
+          */
+         const S srcs = bld.natural_reg(emit_zip(bld, emit_flatten(bld, src0, 1),
+                                                 emit_flatten(bld, src1, 1), 1));
+         const array_reg payload =
+            emit_collect(bld,
+                         emit_untyped_message_header(bld),
+                         emit_insert(layout, bld, addr, dims),
+                         emit_insert(layout, bld, srcs, size));
+         const array_reg dst =
+            emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC,
+                      payload, surface, S(op),
+                      rsize * bld.dispatch_width() / 8, pred);
+
+         return emit_extract(layout, bld, &dst, rsize);
+      }
+
+      namespace detail {
+         /**
+          * Initialize the header present in typed surface messages.
+          */
+         inline array_reg
+         emit_typed_message_header(const svec4_builder &bld)
+         {
+            return emit_untyped_message_header(bld);
+         }
+
+         inline array_reg
+         emit_typed_message_header(const vec4_builder &bld)
+         {
+            const dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD);
+
+            exec_all(bld.MOV(dst, src_reg(0)));
+
+            if (bld.devinfo->gen == 7 && !bld.devinfo->is_haswell) {
+               /* The sample mask is used on IVB for the SIMD8 messages that
+                * have no SIMD4x2 variant.  We only use the two X channels
+                * in that case, mask everything else out.
+                */
+               exec_all(bld.MOV(writemask(dst, WRITEMASK_W), src_reg(0x11)));
+            }
+
+            return array_reg(dst);
+         }
+      }
+
+      /**
+       * Emit a typed surface read opcode.  \p dims determines the number of
+       * components of the address and \p size the number of components of the
+       * returned value.
+       */
+      template<typename B, typename S>
+      S
+      emit_typed_read(const B &bld, const S &surface, const S &addr,
+                      unsigned dims, unsigned size)
+      {
+         using namespace detail;
+         const vector_layout layout(
+            bld, bld.devinfo->gen >= 8 || bld.devinfo->is_haswell, false);
+         const unsigned rlen = DIV_ROUND_UP(size, S::traits::chan_size);
+         array_reg dsts[2];
+
+         for (unsigned i = 0; i < layout.halves; ++i) {
+            /* Get a half builder for this half if required. */
+            const B ubld = (layout.halves > 1 ? bld.half(i) : bld);
+            const array_reg payload =
+               emit_collect(ubld,
+                            emit_typed_message_header(ubld),
+                            emit_insert(layout, bld, addr, dims, i));
+
+            dsts[i] = emit_send(ubld, SHADER_OPCODE_TYPED_SURFACE_READ,
+                                payload, surface, S(size), rlen);
+         }
+
+         return emit_extract(layout, bld, dsts, size);
+      }
+
+      /**
+       * Emit a typed surface write opcode.  \p dims determines the number of
+       * components of the address and \p size the number of components of the
+       * argument.
+       */
+      template<typename B, typename S>
+      void
+      emit_typed_write(const B &bld, const S &surface, const S &addr,
+                       const S &src, unsigned dims, unsigned size)
+      {
+         using namespace detail;
+         const vector_layout layout(
+            bld, bld.devinfo->gen >= 8 || bld.devinfo->is_haswell, false);
+
+         for (unsigned i = 0; i < layout.halves; ++i) {
+            /* Get a half builder for this half if required. */
+            const B ubld = (layout.halves > 1 ? bld.half(i) : bld);
+            const array_reg payload =
+               emit_collect(ubld,
+                            emit_typed_message_header(ubld),
+                            emit_insert(layout, bld, addr, dims, i),
+                            emit_insert(layout, bld, src, size, i));
+
+            emit_send(ubld, SHADER_OPCODE_TYPED_SURFACE_WRITE,
+                      payload, surface, S(size), 0);
+         }
+      }
+
+      /**
+       * Emit a typed surface atomic opcode.  \p dims determines the number of
+       * components of the address and \p rsize the number of components of
+       * the returned value (either zero or one).
+       */
+      template<typename B, typename S>
+      S
+      emit_typed_atomic(const B &bld, const S &surface, const S &addr,
+                        const S &src0, const S &src1,
+                        unsigned dims, unsigned rsize, unsigned op,
+                        brw_predicate pred = BRW_PREDICATE_NONE)
+      {
+         using namespace detail;
+         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+         const vector_layout layout(
+            bld, bld.devinfo->gen >= 8 || bld.devinfo->is_haswell, false);
+         /* Zip the components of both sources, they are represented as the X
+          * and Y components of the same vector.
+          */
+         const S srcs = bld.natural_reg(emit_zip(bld, emit_flatten(bld, src0, 1),
+                                                 emit_flatten(bld, src1, 1), 1));
+         array_reg dsts[2];
+
+         for (unsigned i = 0; i < layout.halves; ++i) {
+            /* Get a half builder for this half if required. */
+            const B ubld = (layout.halves > 1 ? bld.half(i) : bld);
+            const array_reg payload =
+               emit_collect(ubld,
+                            emit_typed_message_header(ubld),
+                            emit_insert(layout, bld, addr, dims, i),
+                            emit_insert(layout, bld, srcs, size, i));
+
+            dsts[i] = emit_send(ubld, SHADER_OPCODE_TYPED_ATOMIC,
+                                payload, surface, S(op), rsize, pred);
+         }
+
+         return emit_extract(layout, bld, dsts, rsize);
+      }
+   }
 }
 
 #endif
-- 
2.3.5