[Beignet] [PATCH 2/2] Backend: Refine image block read with less vector and dst tmp

Xiuli Pan xiuli.pan at intel.com
Mon Aug 22 06:03:04 UTC 2016


From: Pan Xiuli <xiuli.pan at intel.com>

Image block read in simd16 mode need to spilt into 2 simd8 send message
refine the code to share tmp vector regs.
Also fix some simd8 wrong tmp regs.

Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
 backend/src/backend/gen_context.cpp        | 30 ++++++++++++-----------------
 backend/src/backend/gen_insn_selection.cpp | 31 ++++++++++++++++--------------
 2 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index f1c58eb..4b0b0cd 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -3662,18 +3662,13 @@ namespace gbe
   }
 
   void GenContext::emitMBReadInstruction(const SelectionInstruction &insn) {
-    const GenRegister dst = ra->genReg(insn.dst(0));
+    const GenRegister dst = ra->genReg(insn.dst(1));
     const GenRegister coordx = GenRegister::toUniform(ra->genReg(insn.src(0)),GEN_TYPE_D);
     const GenRegister coordy = GenRegister::toUniform(ra->genReg(insn.src(1)),GEN_TYPE_D);
-    GenRegister header, offsetx, offsety, blocksizereg;
-    if (simdWidth == 8)
-      header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD);
-    else
-      header = GenRegister::retype(GenRegister::Qn(ra->genReg(insn.src(2)),1), GEN_TYPE_UD);
-
-    offsetx = GenRegister::offset(header, 0, 0*4);
-    offsety = GenRegister::offset(header, 0, 1*4);
-    blocksizereg = GenRegister::offset(header, 0, 2*4);
+    const GenRegister header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD);
+    const GenRegister offsetx = GenRegister::offset(header, 0, 0*4);
+    const GenRegister offsety = GenRegister::offset(header, 0, 1*4);
+    const GenRegister blocksizereg = GenRegister::offset(header, 0, 2*4);
     size_t vec_size = insn.extra.elem;
     uint32_t blocksize = 0x1F | (vec_size-1) << 16;
 
@@ -3700,7 +3695,7 @@ namespace gbe
     }
     else if (simdWidth == 16)
     {
-      const GenRegister tmp = ra->genReg(insn.dst(vec_size));
+      const GenRegister tmp = GenRegister::retype(ra->genReg(insn.dst(vec_size + 1)), GEN_TYPE_UD);
       p->push();
         // Copy r0 into the header first
         p->curr.execWidth = 8;
@@ -3718,23 +3713,22 @@ namespace gbe
         // Now read the data
         p->curr.execWidth = 8;
         p->MBREAD(tmp, header, insn.getbti(), vec_size);
+        for (uint32_t i = 0; i < vec_size; i++)
+          p->MOV(ra->genReg(insn.dst(i + 1)), GenRegister::offset(tmp, i));
 
         // Second half
         // Update the header with the coord
         p->curr.execWidth = 1;
         p->ADD(offsetx, offsetx, GenRegister::immud(32));
 
-        const GenRegister tmp2 = GenRegister::offset(tmp, vec_size);
         // Now read the data
         p->curr.execWidth = 8;
-        p->MBREAD(tmp2, header, insn.getbti(), vec_size);
+        p->MBREAD(tmp, header, insn.getbti(), vec_size);
 
         // Move the reg to fit vector rule.
-        for (uint32_t i = 0; i < vec_size; i++) {
-          p->MOV(GenRegister::offset(dst, i * 2), GenRegister::offset(tmp, i));
-          p->MOV(GenRegister::offset(dst, i * 2 + 1),
-                 GenRegister::offset(tmp2, i));
-        }
+        for (uint32_t i = 0; i < vec_size; i++)
+          p->MOV(GenRegister::offset(ra->genReg(insn.dst(i + 1)), 1),
+                 GenRegister::offset(tmp, i));
       p->pop();
     } else NOT_IMPLEMENTED;
   }
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index c4e2934..3b21fb5 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2086,30 +2086,33 @@ namespace gbe
                                  uint32_t vec_size) {
 
     uint32_t simdWidth = curr.execWidth;
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_MBREAD, vec_size * simdWidth / 8, 3);
-    SelectionVector *vector = this->appendVector();
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_MBREAD, vec_size * simdWidth / 8 + 1, 2);
+
+    insn->dst(0) = header;
     for (uint32_t i = 0; i < vec_size; ++i) {
-      insn->dst(i) = dsts[i];
+      insn->dst(i + 1) = dsts[i];
       if(simdWidth == 16)
-        insn->dst(i + vec_size) = tmp[i];
+        insn->dst(i + vec_size + 1) = tmp[i];
     }
     insn->src(0) = coordx;
     insn->src(1) = coordy;
-    insn->src(2) = header;
     insn->setbti(bti);
     insn->extra.elem = vec_size; // vector size
 
-    vector->regNum = vec_size;
-    vector->reg = &insn->dst(0);
-    vector->offsetID = 0;
-    vector->isSrc = 0;
-
+    // Only in simd 8 the data is in vector form
+    if(simdWidth == 8) {
+      SelectionVector *vector = this->appendVector();
+      vector->regNum = vec_size;
+      vector->reg = &insn->dst(1);
+      vector->offsetID = 1;
+      vector->isSrc = 0;
+    }
     if(simdWidth == 16)
     {
       SelectionVector *vectortmp = this->appendVector();
       vectortmp->regNum = vec_size;
-      vectortmp->reg = &insn->dst(vec_size);
-      vectortmp->offsetID = vec_size;
+      vectortmp->reg = &insn->dst(vec_size + 1);
+      vectortmp->offsetID = vec_size + 1;
       vectortmp->isSrc = 0;
     }
   }
@@ -6708,11 +6711,11 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
       for (uint32_t i = 0; i < vec_size; ++i) {
         valuesVec.push_back(sel.selReg(insn.getDst(i), TYPE_U32));
         if(simdWidth == 16)
-          tmpVec.push_back(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32));
+          tmpVec.push_back(GenRegister::retype(GenRegister::f8grf(sel.reg(FAMILY_DWORD)), TYPE_U32));
       }
       const GenRegister coordx = sel.selReg(insn.getSrc(0), TYPE_U32);
       const GenRegister coordy = sel.selReg(insn.getSrc(1), TYPE_U32);
-      const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+      const GenRegister header = GenRegister::retype(GenRegister::f8grf(sel.reg(FAMILY_DWORD)), TYPE_U32);
       GenRegister *tmp = NULL;
       if(simdWidth == 16)
         tmp = &tmpVec[0];
-- 
2.7.4



More information about the Beignet mailing list