[Beignet] [PATCH V2 3/6] Change constant buffer load to ia move.

Yang Rong rong.r.yang at intel.com
Thu Apr 18 00:18:04 PDT 2013


Need to patch addrIMM after allocate the curbe.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 backend/src/backend/gen_context.cpp                |   33 +++++++++++++++-
 backend/src/backend/gen_context.hpp                |    6 ++-
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |    1 +
 backend/src/backend/gen_insn_selection.cpp         |   40 +++++++++++++++++---
 backend/src/backend/gen_insn_selection.hxx         |    1 +
 backend/src/backend/gen_register.hpp               |   10 +++++
 6 files changed, 82 insertions(+), 9 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index b4c9a65..54c6d77 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -144,7 +144,7 @@ namespace gbe
     }
   }
 
-  void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) { 
+  void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister src0 = ra->genReg(insn.src(0));
     const GenRegister src1 = ra->genReg(insn.src(1));
@@ -219,6 +219,37 @@ namespace gbe
     p->JMPI(src);
   }
 
+  void GenContext::emitCBMoveInstruction(const SelectionInstruction &insn) {
+    const GenRegister dst = ra->genReg(insn.dst(0));
+    const GenRegister src0 = ra->genReg(insn.src(0));
+    const GenRegister src1 = ra->genReg(insn.src(1));
+    const GenRegister src2 = ra->genReg(insn.src(2));
+    const GenRegister a0 = GenRegister::addr8(0);
+
+    p->ADD(src0, src1, GenRegister::negate(src2));
+
+    this->cbMovePos.push_back(std::make_pair(ir::Register(insn.src(2).value.reg), p->store.size()));
+    p->push();
+      p->curr.execWidth = 8;
+      p->MOV(a0, GenRegister::unpacked_uw(src0.nr, 0));
+      //p->ADD(a0, a0, GenRegister::immuw(0));
+      p->MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+    p->pop();
+
+    if (simdWidth == 16) {
+      const GenRegister dst1 = GenRegister::Qn(dst, 1);
+      const GenRegister src0_1 = GenRegister::Qn(GenRegister::unpacked_uw(src0.nr, 0), 1);
+      this->cbMovePos.push_back(std::make_pair(ir::Register(insn.src(2).value.reg), p->store.size()));
+      p->push();
+        p->curr.execWidth = 8;
+        p->curr.quarterControl = GEN_COMPRESSION_Q2;
+        p->MOV(a0, src0_1);
+        //p->ADD(a0, a0, GenRegister::immuw(0));
+        p->MOV(dst1, GenRegister::indirect(dst1.type, 0, GEN_WIDTH_8));
+      p->pop();
+      }
+  }
+
   void GenContext::emitEotInstruction(const SelectionInstruction &insn) {
     p->push();
       p->curr.predicate = GEN_PREDICATE_NONE;
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 6af174f..8aabcd8 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -80,6 +80,7 @@ namespace gbe
     void emitTernaryInstruction(const SelectionInstruction &insn);
     void emitCompareInstruction(const SelectionInstruction &insn);
     void emitJumpInstruction(const SelectionInstruction &insn);
+    void emitCBMoveInstruction(const SelectionInstruction &insn);
     void emitEotInstruction(const SelectionInstruction &insn);
     void emitNoOpInstruction(const SelectionInstruction &insn);
     void emitWaitInstruction(const SelectionInstruction &insn);
@@ -91,13 +92,14 @@ namespace gbe
     void emitByteScatterInstruction(const SelectionInstruction &insn);
     void emitSampleInstruction(const SelectionInstruction &insn);
     void emitTypedWriteInstruction(const SelectionInstruction &insn);
-
     /*! Implements base class */
     virtual Kernel *allocateKernel(void);
     /*! Store the position of each label instruction in the Gen ISA stream */
     map<ir::LabelIndex, uint32_t> labelPos;
     /*! Store the Gen instructions to patch */
     vector<std::pair<ir::LabelIndex, uint32_t>> branchPos2;
+    /*! Store the constant buffer ia mov instructions to patch */
+    vector<std::pair<ir::Register, uint32_t>> cbMovePos;
     /*! Encode Gen ISA */
     GenEncoder *p;
     /*! Instruction selection on Gen ISA (pre-register allocation) */
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 969ec82..19ebaa6 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -4,6 +4,7 @@ DECL_GEN7_SCHEDULE(Unary,           20,        4,        2)
 DECL_GEN7_SCHEDULE(Binary,          20,        4,        2)
 DECL_GEN7_SCHEDULE(Ternary,         20,        4,        2)
 DECL_GEN7_SCHEDULE(Compare,         20,        4,        2)
+DECL_GEN7_SCHEDULE(CBMove,          40,        2,        2)
 DECL_GEN7_SCHEDULE(Jump,            14,        1,        1)
 DECL_GEN7_SCHEDULE(Eot,             20,        1,        1)
 DECL_GEN7_SCHEDULE(NoOp,            20,        2,        2)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e0e8920..67726be 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -25,7 +25,7 @@
 /* This is the instruction selection code. First of all, this is a bunch of c++
  * crap. Sorry if this is not that readable. Anyway, the goal here is to take
  * GenIR code (i.e. the very regular, very RISC IR) and to produce GenISA with
- * virtual registers (i.e. regular GenIR registers). 
+ * virtual registers (i.e. regular GenIR registers).
  *
  * Overall idea:
  * =============
@@ -72,7 +72,7 @@
  * *same* flag register for the predicates (used for masking) and the
  * conditional modifier (used as a destination for CMP). This leads to extra
  * complications with compare instructions and select instructions. Basically,
- * we need to insert extra MOVs. 
+ * we need to insert extra MOVs.
  *
  * Also, there is some extra kludge to handle the predicates for JMPI.
  *
@@ -435,6 +435,8 @@ namespace gbe
     void LABEL(ir::LabelIndex label);
     /*! Jump indexed instruction */
     void JMPI(Reg src, ir::LabelIndex target);
+    /*!constant buffer mov instruction*/
+    void CB_MOVE(Reg dst, Reg src0, Reg src1, Reg src2);
     /*! Compare instructions */
     void CMP(uint32_t conditional, Reg src0, Reg src1);
     /*! Select instruction with embedded comparison */
@@ -481,7 +483,7 @@ namespace gbe
   static void markAllChildren(SelectionDAG &dag) {
     // Do not merge anything, so all sources become roots
     for (uint32_t childID = 0; childID < dag.childNum; ++childID)
-      if (dag.child[childID]) 
+      if (dag.child[childID])
         dag.child[childID]->isRoot = 1;
   }
 
@@ -684,6 +686,14 @@ namespace gbe
     insn->index = uint16_t(index);
   }
 
+  void Selection::Opaque::CB_MOVE(Reg dst, Reg src0, Reg src1, Reg src2) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_CB_MOVE, 1, 3);
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+    insn->src(2) = src2;
+    insn->dst(0) = dst;
+  }
+
   void Selection::Opaque::CMP(uint32_t conditional, Reg src0, Reg src1) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_CMP, 0, 2);
     insn->src(0) = src0;
@@ -1057,7 +1067,7 @@ namespace gbe
   // Implementation of all patterns
   ///////////////////////////////////////////////////////////////////////////
 
-  GenRegister getRegisterFromImmediate(ir::Immediate imm) 
+  GenRegister getRegisterFromImmediate(ir::Immediate imm)
   {
     using namespace ir;
     switch (imm.type) {
@@ -1614,6 +1624,21 @@ namespace gbe
   /*! Load instruction pattern */
   DECL_PATTERN(LoadInstruction)
   {
+    void emitCBMove(Selection::Opaque &sel, const ir::LoadInstruction &insn, GenRegister addr)  const
+    {
+      using namespace ir;
+      GBE_ASSERT(insn.getValueNum() == 1);   //todo: handle vec later
+
+      GenRegister offset = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+      const GenRegister baseReg = sel.selReg(sel.ctx.getConstBaseReg(Register(addr.value.reg)), TYPE_U32);
+      //const uint32_t valueNum = insn.getValueNum();
+      //GenRegister dst[valueNum];
+      //for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
+      const GenRegister dst = sel.selReg(insn.getValue(0), insn.getValueType());
+
+      sel.CB_MOVE(dst, offset, GenRegister::retype(addr, GEN_TYPE_UD), baseReg);
+    }
+
     void emitUntypedRead(Selection::Opaque &sel,
                          const ir::LoadInstruction &insn,
                          GenRegister addr,
@@ -1659,10 +1684,13 @@ namespace gbe
       const GenRegister address = sel.selReg(insn.getAddress());
       const AddressSpace space = insn.getAddressSpace();
       GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
+                 insn.getAddressSpace() == MEM_CONSTANT ||
                  insn.getAddressSpace() == MEM_PRIVATE ||
                  insn.getAddressSpace() == MEM_LOCAL);
       GBE_ASSERT(sel.ctx.isScalarReg(insn.getValue(0)) == false);
-      if (insn.isAligned() == true)
+      if(insn.getAddressSpace() == MEM_CONSTANT)
+        this->emitCBMove(sel, insn, address);
+      else if (insn.isAligned() == true)
         this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
       else {
         const GenRegister value = sel.selReg(insn.getValue(0));
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 691100b..f89ad4c 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -24,6 +24,7 @@ DECL_SELECTION_IR(SEL_CMP, CompareInstruction)
 DECL_SELECTION_IR(MAD, TernaryInstruction)
 DECL_SELECTION_IR(JMPI, JumpInstruction)
 DECL_SELECTION_IR(EOT, EotInstruction)
+DECL_SELECTION_IR(CB_MOVE, CBMoveInstruction)
 DECL_SELECTION_IR(NOP, NoOpInstruction)
 DECL_SELECTION_IR(WAIT, WaitInstruction)
 DECL_SELECTION_IR(MATH, MathInstruction)
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 92122a6..d772b0d 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -725,6 +725,16 @@ namespace gbe
       return ub16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
     }
 
+    static INLINE GenRegister unpacked_uw(uint32_t nr, uint32_t subnr) {
+      return GenRegister(GEN_GENERAL_REGISTER_FILE,
+                         nr,
+                         subnr,
+                         GEN_TYPE_UW,
+                         GEN_VERTICAL_STRIDE_16,
+                         GEN_WIDTH_8,
+                         GEN_HORIZONTAL_STRIDE_2);
+    }
+
     static INLINE GenRegister mask(uint32_t subnr) {
       return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr);
     }
-- 
1.7.9.5



More information about the Beignet mailing list