[Beignet] [V2 PATCH 5/7] Backend: Handle the bswap using indirect mode access.

Zhigang Gong zhigang.gong at linux.intel.com
Sun Mar 8 21:57:41 PDT 2015


On Mon, Mar 09, 2015 at 01:55:06PM +0800, He Junyan wrote:
> 
> On 2015年03月09日 09:11, Zhigang Gong wrote:
> >On Fri, Mar 06, 2015 at 03:24:00PM +0800, junyan.he at inbox.com wrote:
> >>From: Junyan He <junyan.he at linux.intel.com>
> >>
> >>The swap for short will be like:
> >>mov(1)   a0<1>:UD        0xe600e61UD            { align1 WE_all };
> >>mov(1)   a0.1<1>:UD      0xe620e63UD            { align1 WE_all };
> >>mov(1)   a0.2<1>:UD      0xe640e65UD            { align1 WE_all };
> >>mov(1)   a0.3<1>:UD      0xe660e67UD            { align1 WE_all };
> >>mov(8)   g114<1>:UB      g[a0]<VxH,1,0>:UB      { align1 WE_all 1Q };
> >>mov(8)   g114.8<1>:UB    g[a0 8]<VxH,1,0>:UB    { align1 WE_all 1Q };
> >>mov(8)   g114.16<1>:UB   g[a0 16]<VxH,1,0>:UB   { align1 WE_all 1Q };
> >>mov(8)   g114.24<1>:UB   g[a0 24]<VxH,1,0>:UB   { align1 WE_all 1Q };
> >>mov(16)  g113<1>:UW      g114<8,8,1>:UW         { align1 WE_normal 1H };
> >>
> >>Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> >>---
> >>  backend/src/backend/gen_context.cpp        |  112 ++++++++++++++++++++++++++++
> >>  backend/src/backend/gen_insn_selection.cpp |    9 +++
> >>  backend/src/backend/gen_insn_selection.hxx |    1 +
> >>  3 files changed, 122 insertions(+)
> >>
> >>diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> >>index 6856510..46b4a06 100644
> >>--- a/backend/src/backend/gen_context.cpp
> >>+++ b/backend/src/backend/gen_context.cpp
> >>@@ -297,6 +297,118 @@ namespace gbe
> >>            p->MOV(dst.top_half(this->simdWidth), GenRegister::immud(0));
> >>          break;
> >>        }
> >>+      case SEL_OP_BSWAP: {
> >>+        uint32_t simd = p->curr.execWidth;
> >>+        GBE_ASSERT(simd == 8 || simd == 16 || simd == 1);
> >>+        uint16_t new_a0[16];
> >>+        memset(new_a0, 0, sizeof(new_a0));
> >>+
> >>+        GBE_ASSERT(src.type == dst.type);
> >>+        uint32_t start_addr = src.nr*32 + src.subnr;
> >>+
> >>+        if (simd == 1) {
> >>+          GBE_ASSERT(src.hstride == GEN_HORIZONTAL_STRIDE_0
> >>+              && dst.hstride == GEN_HORIZONTAL_STRIDE_0);
> >>+          if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D) {
> >>+            GBE_ASSERT(start_addr >= 0);
> >>+            new_a0[0] = start_addr + 3;
> >>+            new_a0[1] = start_addr + 2;
> >>+            new_a0[2] = start_addr + 1;
> >>+            new_a0[3] = start_addr;
> >>+            this->setA0Content(new_a0, 0, 4);
> >>+
> >>+            p->push();
> >>+            p->curr.execWidth = 4;
> >>+            p->curr.predicate = GEN_PREDICATE_NONE;
> >>+            p->curr.noMask = 1;
> >>+            GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> >>+                a0[0], new_a0[0] - a0[0]);
> >>+            GenRegister dst_ = dst;
> >>+            dst_.type = GEN_TYPE_UB;
> >>+            dst_.hstride = GEN_HORIZONTAL_STRIDE_1;
> >>+            dst_.width = GEN_WIDTH_4;
> >>+            dst_.vstride = GEN_VERTICAL_STRIDE_4;
> >>+            p->MOV(dst_, ind_src);
> >>+            p->pop();
> >>+          } else if (src.type == GEN_TYPE_UW || src.type == GEN_TYPE_W) {
> >>+            p->MOV(GenRegister::retype(dst, GEN_TYPE_UB),
> >>+                GenRegister::retype(GenRegister::offset(src, 0, 1), GEN_TYPE_UB));
> >>+            p->MOV(GenRegister::retype(GenRegister::offset(dst, 0, 1), GEN_TYPE_UB),
> >>+                GenRegister::retype(src, GEN_TYPE_UB));
> >>+          } else {
> >>+            GBE_ASSERT(0);
> >>+          }
> >>+        } else {
> >>+          if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D) {
> >>+            GBE_ASSERT(src.subnr == 0);
> >The above assertion is not correct. Because a valid simd8 or simd16 BSWAP instruction may have a
> >uniform source register. We can't assume the source register must not be uniform value.
> I think the uniform case will be handled in  if (simd == 1)  case
> just above.
> I find if src is uniform, the dst seems always to be uniform and the
> simd will be 1 here.
This is not ture. If the src is uniform but the dst is defined in multiple places due
to phi instruction, the dst will not be identified as a uniform value.

> >>+            GBE_ASSERT(dst.subnr == 0);
> >>+            GBE_ASSERT(tmp.subnr == 0);
> >>+            GBE_ASSERT(start_addr >= 0);
> >>+            new_a0[0] = start_addr + 3;
> >>+            new_a0[1] = start_addr + 2;
> >>+            new_a0[2] = start_addr + 1;
> >>+            new_a0[3] = start_addr;
> >>+            new_a0[4] = start_addr + 7;
> >>+            new_a0[5] = start_addr + 6;
> >>+            new_a0[6] = start_addr + 5;
> >>+            new_a0[7] = start_addr + 4;
> >>+            this->setA0Content(new_a0, 56);
> >>+
> >>+            p->push();
> >>+            p->curr.execWidth = 8;
> >>+            p->curr.predicate = GEN_PREDICATE_NONE;
> >>+            p->curr.noMask = 1;
> >>+            GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> >>+                a0[0], new_a0[0] - a0[0]);
> >>+            p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
> >>+            for (int i = 1; i < 4; i++) {
> >>+              ind_src.addr_imm += 8;
> >>+              p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 0, 8*i), ind_src);
> >>+            }
> >>+            if (simd == 16) {
> >>+              for (int i = 0; i < 4; i++) {
> >>+                ind_src.addr_imm += 8;
> >>+                p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 1, 8*i), ind_src);
> >>+              }
> >>+            }
> >>+            p->pop();
> >>+
> >>+            p->MOV(dst, tmp);
> >>+          } else if (src.type == GEN_TYPE_UW || src.type == GEN_TYPE_W) {
> >>+            GBE_ASSERT(src.subnr == 0 || src.subnr == 16);
> >>+            GBE_ASSERT(dst.subnr == 0 || dst.subnr == 16);
> >>+            GBE_ASSERT(tmp.subnr == 0 || tmp.subnr == 16);
> >>+            GBE_ASSERT(start_addr >= 0);
> >>+            new_a0[0] = start_addr + 1;
> >>+            new_a0[1] = start_addr;
> >>+            new_a0[2] = start_addr + 3;
> >>+            new_a0[3] = start_addr + 2;
> >>+            new_a0[4] = start_addr + 5;
> >>+            new_a0[5] = start_addr + 4;
> >>+            new_a0[6] = start_addr + 7;
> >>+            new_a0[7] = start_addr + 6;
> >>+            this->setA0Content(new_a0, 56);
> >>+
> >>+            p->push();
> >>+            p->curr.execWidth = 8;
> >>+            p->curr.predicate = GEN_PREDICATE_NONE;
> >>+            p->curr.noMask = 1;
> >>+            GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> >>+                a0[0], new_a0[0] - a0[0]);
> >>+            p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
> >>+            for (int i = 1; i < (simd == 8 ? 2 : 4); i++) {
> >>+              ind_src.addr_imm += 8;
> >>+              p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 0, 8*i), ind_src);
> >>+            }
> >>+            p->pop();
> >>+
> >>+            p->MOV(dst, tmp);
> >>+          } else {
> >>+            GBE_ASSERT(0);
> >>+          }
> >>+        }
> >>+      }
> >>+      break;
> >>        default:
> >>          NOT_IMPLEMENTED;
> >>      }
> >>diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> >>index d100f80..2b166b1 100644
> >>--- a/backend/src/backend/gen_insn_selection.cpp
> >>+++ b/backend/src/backend/gen_insn_selection.cpp
> >>@@ -498,6 +498,7 @@ namespace gbe
> >>      ALU1(RNDE)
> >>      ALU1(F16TO32)
> >>      ALU1(F32TO16)
> >>+    ALU1WithTemp(BSWAP)
> >>      ALU2(SEL)
> >>      ALU2(SEL_INT64)
> >>      ALU1(NOT)
> >>@@ -2121,6 +2122,14 @@ namespace gbe
> >>            case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break;
> >>            case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break;
> >>            case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break;
> >>+          case ir::OP_BSWAP:
> >>+            {
> >>+              ir::Register tmp = sel.reg(getFamily(insnType));
> >>+              const GenRegister src_ = GenRegister::retype(src, getGenType(insnType));
> >>+              const GenRegister dst_ = GenRegister::retype(dst, getGenType(insnType));
> >>+              sel.BSWAP(dst_, src_, sel.selReg(tmp, insnType));
> >>+              break;
> >>+            }
> >>            case ir::OP_SIMD_ANY:
> >>              {
> >>                const GenRegister constZero = GenRegister::immuw(0);;
> >>diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> >>index be1f7ec..09f5aaf 100644
> >>--- a/backend/src/backend/gen_insn_selection.hxx
> >>+++ b/backend/src/backend/gen_insn_selection.hxx
> >>@@ -1,5 +1,6 @@
> >>  DECL_SELECTION_IR(LABEL, LabelInstruction)
> >>  DECL_SELECTION_IR(MOV, UnaryInstruction)
> >>+DECL_SELECTION_IR(BSWAP, UnaryWithTempInstruction)
> >>  DECL_SELECTION_IR(MOV_DF, UnaryWithTempInstruction)
> >>  DECL_SELECTION_IR(LOAD_DF_IMM, UnaryWithTempInstruction)
> >>  DECL_SELECTION_IR(LOAD_INT64_IMM, UnaryInstruction)
> >>-- 
> >>1.7.9.5
> >>
> >>_______________________________________________
> >>Beignet mailing list
> >>Beignet at lists.freedesktop.org
> >>http://lists.freedesktop.org/mailman/listinfo/beignet
> >_______________________________________________
> >Beignet mailing list
> >Beignet at lists.freedesktop.org
> >http://lists.freedesktop.org/mailman/listinfo/beignet
> 
> 
> 


More information about the Beignet mailing list