[Mesa-dev] [PATCH 1/7] mesa/st: add lowering pass for YUV samplers

Rob Clark robdclark at gmail.com
Thu Sep 8 20:30:34 UTC 2016


Signed-off-by: Rob Clark <robdclark at gmail.com>
---
 src/mesa/Makefile.sources                  |   2 +
 src/mesa/state_tracker/st_tgsi_lower_yuv.c | 447 +++++++++++++++++++++++++++++
 src/mesa/state_tracker/st_tgsi_lower_yuv.h |  34 +++
 3 files changed, 483 insertions(+)
 create mode 100644 src/mesa/state_tracker/st_tgsi_lower_yuv.c
 create mode 100644 src/mesa/state_tracker/st_tgsi_lower_yuv.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 363b133..653d615 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -513,6 +513,8 @@ STATETRACKER_FILES = \
 	state_tracker/st_scissor.h \
 	state_tracker/st_texture.c \
 	state_tracker/st_texture.h \
+	state_tracker/st_tgsi_lower_yuv.c \
+	state_tracker/st_tgsi_lower_yuv.h \
 	state_tracker/st_vdpau.c \
 	state_tracker/st_vdpau.h
 
diff --git a/src/mesa/state_tracker/st_tgsi_lower_yuv.c b/src/mesa/state_tracker/st_tgsi_lower_yuv.c
new file mode 100644
index 0000000..e346b97
--- /dev/null
+++ b/src/mesa/state_tracker/st_tgsi_lower_yuv.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright © 2016 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+
+#include "st_tgsi_lower_yuv.h"
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_debug.h"
+
+#include "util/bitscan.h"
+
+struct tgsi_yuv_transform {
+   struct tgsi_transform_context base;
+   struct tgsi_shader_info info;
+   struct tgsi_full_src_register imm[4];
+   struct {
+      struct tgsi_full_src_register src;
+      struct tgsi_full_dst_register dst;
+   } tmp[2];
+#define A 0
+#define B 1
+
+   /* Maps a primary sampler (used for Y) to the U or UV sampler.  In
+    * case of 3-plane YUV format, the V plane is next sampler after U.
+    */
+   unsigned char sampler_map[PIPE_MAX_SAMPLERS][2];
+
+   bool first_instruction_emitted;
+   unsigned free_slots;
+   unsigned lower_nv12;
+   unsigned lower_iyuv;
+};
+
+static inline struct tgsi_yuv_transform *
+tgsi_yuv_transform(struct tgsi_transform_context *tctx)
+{
+   return (struct tgsi_yuv_transform *)tctx;
+}
+
+static void
+reg_dst(struct tgsi_full_dst_register *dst,
+        const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
+{
+   *dst = *orig_dst;
+   dst->Register.WriteMask &= wrmask;
+   assert(dst->Register.WriteMask);
+}
+
+static inline void
+get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
+{
+   swiz[0] = src->SwizzleX;
+   swiz[1] = src->SwizzleY;
+   swiz[2] = src->SwizzleZ;
+   swiz[3] = src->SwizzleW;
+}
+
+static void
+reg_src(struct tgsi_full_src_register *src,
+        const struct tgsi_full_src_register *orig_src,
+        unsigned sx, unsigned sy, unsigned sz, unsigned sw)
+{
+   unsigned swiz[4];
+   get_swiz(swiz, &orig_src->Register);
+   *src = *orig_src;
+   src->Register.SwizzleX = swiz[sx];
+   src->Register.SwizzleY = swiz[sy];
+   src->Register.SwizzleZ = swiz[sz];
+   src->Register.SwizzleW = swiz[sw];
+}
+
+#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
+#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
+      TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
+
+static inline struct tgsi_full_instruction
+tex_instruction(unsigned samp)
+{
+   struct tgsi_full_instruction inst;
+
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+   inst.Instruction.Texture = 1;
+   inst.Texture.Texture = TGSI_TEXTURE_2D;
+   inst.Instruction.NumDstRegs = 1;
+   inst.Instruction.NumSrcRegs = 2;
+   inst.Src[1].Register.File  = TGSI_FILE_SAMPLER;
+   inst.Src[1].Register.Index = samp;
+
+   return inst;
+}
+
+static inline struct tgsi_full_instruction
+mov_instruction(void)
+{
+   struct tgsi_full_instruction inst;
+
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+   inst.Instruction.Saturate = 0;
+   inst.Instruction.NumDstRegs = 1;
+   inst.Instruction.NumSrcRegs = 1;
+
+   return inst;
+}
+
+static inline struct tgsi_full_instruction
+dp3_instruction(void)
+{
+   struct tgsi_full_instruction inst;
+
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_DP3;
+   inst.Instruction.NumDstRegs = 1;
+   inst.Instruction.NumSrcRegs = 2;
+
+   return inst;
+}
+
+
+
+static void
+emit_immed(struct tgsi_transform_context *tctx, int idx,
+           float x, float y, float z, float w)
+{
+   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+   struct tgsi_shader_info *info = &ctx->info;
+   struct tgsi_full_immediate immed;
+
+   immed = tgsi_default_full_immediate();
+   immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
+   immed.u[0].Float = x;
+   immed.u[1].Float = y;
+   immed.u[2].Float = z;
+   immed.u[3].Float = w;
+   tctx->emit_immediate(tctx, &immed);
+
+   ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE;
+   ctx->imm[idx].Register.Index = info->immediate_count + idx;
+   ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X;
+   ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y;
+   ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z;
+   ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W;
+}
+
+static void
+emit_samp(struct tgsi_transform_context *tctx, unsigned samp)
+{
+   tgsi_transform_sampler_decl(tctx, samp);
+   tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D,
+                                    TGSI_RETURN_TYPE_FLOAT);
+}
+
+/* Emit extra declarations we need:
+ *  + 2 TEMP to hold intermediate results
+ *  + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per
+ *    lowered YUV sampler
+ *  + extra immediates for doing CSC
+ */
+static void
+emit_decls(struct tgsi_transform_context *tctx)
+{
+   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+   struct tgsi_shader_info *info = &ctx->info;
+   unsigned mask, tempbase, i;
+   struct tgsi_full_declaration decl;
+
+   /*
+    * Declare immediates for CSC conversion:
+    */
+
+   /* ITU-R BT.601 conversion */
+   emit_immed(tctx, 0, 1.164,  0.000,  1.596,  0.0);
+   emit_immed(tctx, 1, 1.164, -0.392, -0.813,  0.0);
+   emit_immed(tctx, 2, 1.164,  2.017,  0.000,  0.0);
+   emit_immed(tctx, 3, 0.0625, 0.500,  0.500,  1.0);
+
+   /*
+    * Declare extra samplers / sampler-views:
+    */
+
+   mask = ctx->lower_nv12 | ctx->lower_iyuv;
+   while (mask) {
+      unsigned extra, y_samp = u_bit_scan(&mask);
+
+      extra = u_bit_scan(&ctx->free_slots);
+      ctx->sampler_map[y_samp][0] = extra;
+      emit_samp(tctx, extra);
+
+      if (ctx->lower_iyuv & (1 << y_samp)) {
+         extra = u_bit_scan(&ctx->free_slots);
+         ctx->sampler_map[y_samp][1] = extra;
+         emit_samp(tctx, extra);
+      }
+   }
+
+   /*
+    * Declare extra temp:
+    */
+
+   tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
+
+   for (i = 0; i < 2; i++) {
+      decl = tgsi_default_full_declaration();
+      decl.Declaration.File = TGSI_FILE_TEMPORARY;
+      decl.Range.First = decl.Range.Last = tempbase + i;
+      tctx->emit_declaration(tctx, &decl);
+
+      ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
+      ctx->tmp[i].src.Register.Index = tempbase + i;
+      ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
+      ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
+      ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
+      ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
+
+      ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
+      ctx->tmp[i].dst.Register.Index = tempbase + i;
+      ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
+   }
+}
+
+/* call with YUV in tmpA.xyz */
+static void
+yuv_to_rgb(struct tgsi_transform_context *tctx,
+           struct tgsi_full_dst_register *dst)
+{
+   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+   struct tgsi_full_instruction inst;
+
+   /*
+    * IMM[0] FLT32 { 1.164,  0.000,  1.596,  0.0 }
+    * IMM[1] FLT32 { 1.164, -0.392, -0.813,  0.0 }
+    * IMM[2] FLT32 { 1.164,  2.017,  0.000,  0.0 }
+    * IMM[3] FLT32 { 0.0625, 0.500,  0.500,  1.0 }
+    */
+
+   /* SUB tmpA.xyz, tmpA, imm[3] */
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+   inst.Instruction.Saturate = 0;
+   inst.Instruction.NumDstRegs = 1;
+   inst.Instruction.NumSrcRegs = 2;
+   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
+   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
+   reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _));
+   tctx->emit_instruction(tctx, &inst);
+
+   /* DP3 dst.x, tmpA, imm[0] */
+   inst = dp3_instruction();
+   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
+   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+   reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
+   tctx->emit_instruction(tctx, &inst);
+
+   /* DP3 dst.y, tmpA, imm[1] */
+   inst = dp3_instruction();
+   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
+   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+   reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
+   tctx->emit_instruction(tctx, &inst);
+
+   /* DP3 dst.z, tmpA, imm[2] */
+   inst = dp3_instruction();
+   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
+   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+   reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
+   tctx->emit_instruction(tctx, &inst);
+
+   /* MOV dst.w, imm[0].x */
+   inst = mov_instruction();
+   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
+   reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
+   tctx->emit_instruction(tctx, &inst);
+}
+
+static void
+lower_nv12(struct tgsi_transform_context *tctx,
+           struct tgsi_full_instruction *originst)
+{
+   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+   struct tgsi_full_instruction inst;
+   struct tgsi_full_src_register *coord = &originst->Src[0];
+   unsigned samp = originst->Src[1].Register.Index;
+
+   /* sample Y:
+    *    TEX tempA.x, coord, texture[samp], 2D;
+    */
+   inst = tex_instruction(samp);
+   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
+   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
+   tctx->emit_instruction(tctx, &inst);
+
+   /* sample UV:
+    *    TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D;
+    *    MOV tempA.yz, tempB._xy_
+    */
+   inst = tex_instruction(ctx->sampler_map[samp][0]);
+   reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY);
+   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
+   tctx->emit_instruction(tctx, &inst);
+
+   inst = mov_instruction();
+   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ);
+   reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _));
+   tctx->emit_instruction(tctx, &inst);
+
+   /* At this point, we have YUV in tempA.xyz, rest is common: */
+   yuv_to_rgb(tctx, &originst->Dst[0]);
+}
+
+static void
+lower_iyuv(struct tgsi_transform_context *tctx,
+           struct tgsi_full_instruction *originst)
+{
+   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+   struct tgsi_full_instruction inst;
+   struct tgsi_full_src_register *coord = &originst->Src[0];
+   unsigned samp = originst->Src[1].Register.Index;
+
+   /* sample Y:
+    *    TEX tempA.x, coord, texture[samp], 2D;
+    */
+   inst = tex_instruction(samp);
+   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
+   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
+   tctx->emit_instruction(tctx, &inst);
+
+   /* sample U:
+    *    TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D;
+    *    MOV tempA.y, tempB._x__
+    */
+   inst = tex_instruction(ctx->sampler_map[samp][0]);
+   reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
+   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
+   tctx->emit_instruction(tctx, &inst);
+
+   inst = mov_instruction();
+   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
+   reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _));
+   tctx->emit_instruction(tctx, &inst);
+
+   /* sample V:
+    *    TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D;
+    *    MOV tempA.z, tempB.__x_
+    */
+   inst = tex_instruction(ctx->sampler_map[samp][1]);
+   reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
+   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
+   tctx->emit_instruction(tctx, &inst);
+
+   inst = mov_instruction();
+   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
+   reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _));
+   tctx->emit_instruction(tctx, &inst);
+
+   /* At this point, we have YUV in tempA.xyz, rest is common: */
+   yuv_to_rgb(tctx, &originst->Dst[0]);
+}
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+                struct tgsi_full_instruction *inst)
+{
+   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
+
+   if (!ctx->first_instruction_emitted) {
+      emit_decls(tctx);
+      ctx->first_instruction_emitted = true;
+   }
+
+   switch (inst->Instruction.Opcode) {
+   /* TODO what other tex opcode's can be used w/ external eglimgs? */
+   case TGSI_OPCODE_TEX: {
+      unsigned samp = inst->Src[1].Register.Index;
+      if (ctx->lower_nv12 & (1 << samp)) {
+         lower_nv12(tctx, inst);
+      } else if (ctx->lower_iyuv & (1 << samp)) {
+         lower_iyuv(tctx, inst);
+      } else {
+         goto skip;
+      }
+      break;
+   }
+   default:
+   skip:
+      tctx->emit_instruction(tctx, inst);
+      return;
+   }
+}
+
+extern const struct tgsi_token *
+st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
+                  unsigned lower_nv12, unsigned lower_iyuv)
+{
+   struct tgsi_yuv_transform ctx;
+   struct tgsi_token *newtoks;
+   int newlen;
+
+   assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */
+
+//   tgsi_dump(tokens, 0);
+//   debug_printf("\n");
+
+   memset(&ctx, 0, sizeof(ctx));
+   ctx.base.transform_instruction = transform_instr;
+   ctx.free_slots = free_slots;
+   ctx.lower_nv12 = lower_nv12;
+   ctx.lower_iyuv = lower_iyuv;
+   tgsi_scan_shader(tokens, &ctx.info);
+
+   /* TODO better job of figuring out how many extra tokens we need..
+    * this is a pain about tgsi_transform :-/
+    */
+   newlen = tgsi_num_tokens(tokens) + 120;
+   newtoks = tgsi_alloc_tokens(newlen);
+   if (!newtoks)
+      return NULL;
+
+   tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
+
+//   tgsi_dump(newtoks, 0);
+//   debug_printf("\n");
+
+   return newtoks;
+}
diff --git a/src/mesa/state_tracker/st_tgsi_lower_yuv.h b/src/mesa/state_tracker/st_tgsi_lower_yuv.h
new file mode 100644
index 0000000..c46423b
--- /dev/null
+++ b/src/mesa/state_tracker/st_tgsi_lower_yuv.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright © 2016 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ST_TGSI_LOWER_YUV_H
+#define ST_TGSI_LOWER_YUV_H
+
+struct tgsi_token;
+
+extern const struct tgsi_token * st_tgsi_lower_yuv(const struct tgsi_token *tokens,
+                                                   unsigned free_slots,
+                                                   unsigned lower_nv12,
+                                                   unsigned lower_iyuv);
+
+#endif /* ST_TGSI_LOWER_YUV_H */
-- 
2.7.4



More information about the mesa-dev mailing list