[Beignet] [PATCH v3] GBE: Avoid unecessary dag/liveness computing at backend.

Zhigang Gong zhigang.gong at intel.com
Sun Apr 27 22:25:47 PDT 2014


We don't need to compute dag/liveness at the backend when
we switch to a new code gen strategy.
For the unit test case, this patch could save 15% of the
overall execution time. For the luxmark with STRICT conformance
mode, it saves about 40% of the build time.

v3: fix some minor bugs.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/context.cpp            | 28 ++++++++++++++++++++--------
 backend/src/backend/context.hpp            |  2 ++
 backend/src/backend/gen_context.cpp        | 26 +++++++++++++++++++-------
 backend/src/backend/gen_context.hpp        |  5 +++--
 backend/src/backend/gen_insn_selection.cpp | 10 ++++++++--
 backend/src/backend/gen_program.cpp        | 20 ++++++++++++++++----
 backend/src/backend/gen_reg_allocation.cpp |  5 +++--
 7 files changed, 71 insertions(+), 25 deletions(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index dc27d83..6a0bca2 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -335,12 +335,8 @@ namespace gbe
     this->liveness = GBE_NEW(ir::Liveness, const_cast<ir::Function&>(fn));
     this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness);
     // r0 (GEN_REG_SIZE) is always set by the HW and used at the end by EOT
-    this->registerAllocator = GBE_NEW(RegisterAllocator, GEN_REG_SIZE, 4*KB - GEN_REG_SIZE);
-    this->scratchAllocator = GBE_NEW(ScratchAllocator, 12*KB);
-    if (fn.getSimdWidth() == 0 || OCL_SIMD_WIDTH != 15)
-      this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
-    else
-      this->simdWidth = fn.getSimdWidth();
+    this->registerAllocator = NULL; //GBE_NEW(RegisterAllocator, GEN_REG_SIZE, 4*KB - GEN_REG_SIZE);
+    this->scratchAllocator = NULL; //GBE_NEW(ScratchAllocator, 12*KB);
   }
 
   Context::~Context(void) {
@@ -350,12 +346,28 @@ namespace gbe
     GBE_SAFE_DELETE(this->liveness);
   }
 
+  void Context::startNewCG(uint32_t simdWidth) {
+    if (simdWidth == 0 || OCL_SIMD_WIDTH != 15)
+      this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
+    else
+      this->simdWidth = simdWidth;
+    GBE_SAFE_DELETE(this->registerAllocator);
+    GBE_SAFE_DELETE(this->scratchAllocator);
+    GBE_ASSERT(dag != NULL && liveness != NULL);
+    this->registerAllocator = GBE_NEW(RegisterAllocator, GEN_REG_SIZE, 4*KB - GEN_REG_SIZE);
+    this->scratchAllocator = GBE_NEW(ScratchAllocator, 12*KB);
+    this->curbeRegs.clear();
+    this->JIPs.clear();
+  }
+
   Kernel *Context::compileKernel(void) {
     this->kernel = this->allocateKernel();
     this->kernel->simdWidth = this->simdWidth;
     this->buildArgList();
-    this->buildUsedLabels();
-    this->buildJIPs();
+    if (usedLabels.size() == 0)
+      this->buildUsedLabels();
+    if (JIPs.size() == 0)
+      this->buildJIPs();
     this->buildStack();
     this->handleSLM();
     if (this->emitCode() == false) {
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index 26167a0..d4dcfca 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -56,6 +56,8 @@ namespace gbe
     Context(const ir::Unit &unit, const std::string &name);
     /*! Release everything needed */
     virtual ~Context(void);
+    /*! start new code generation with specific simd width. */
+    void startNewCG(uint32_t simdWidth);
     /*! Compile the code */
     Kernel *compileKernel(void);
     /*! Tells if the labels is used */
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 34e3e61..4da47f8 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -46,15 +46,12 @@ namespace gbe
   GenContext::GenContext(const ir::Unit &unit,
                          const std::string &name,
                          uint32_t deviceID,
-                         uint32_t reservedSpillRegs,
-                         bool limitRegisterPressure,
                          bool relaxMath) :
-    Context(unit, name), deviceID(deviceID), reservedSpillRegs(reservedSpillRegs),
-    limitRegisterPressure(limitRegisterPressure), relaxMath(relaxMath)
+    Context(unit, name), deviceID(deviceID), relaxMath(relaxMath)
   {
-    this->p = GBE_NEW(GenEncoder, simdWidth, 7, deviceID); // XXX handle more than Gen7
-    this->sel = GBE_NEW(Selection, *this);
-    this->ra = GBE_NEW(GenRegAllocator, *this);
+    this->p = NULL;
+    this->sel = NULL;
+    this->ra = NULL;
   }
 
   GenContext::~GenContext(void) {
@@ -63,6 +60,21 @@ namespace gbe
     GBE_DELETE(this->p);
   }
 
+  void GenContext::startNewCG(uint32_t simdWidth, uint32_t reservedSpillRegs, bool limitRegisterPressure) {
+    this->limitRegisterPressure = limitRegisterPressure;
+    this->reservedSpillRegs = reservedSpillRegs;
+    Context::startNewCG(simdWidth);
+    GBE_SAFE_DELETE(ra);
+    GBE_SAFE_DELETE(sel);
+    GBE_SAFE_DELETE(p);
+    this->p = GBE_NEW(GenEncoder, this->simdWidth, 7, deviceID); // XXX handle more than Gen7
+    this->sel = GBE_NEW(Selection, *this);
+    this->ra = GBE_NEW(GenRegAllocator, *this);
+    this->branchPos2.clear();
+    this->branchPos3.clear();
+    this->labelPos.clear();
+  }
+
   void GenContext::emitInstructionStream(void) {
     // Emit Gen ISA
     for (auto &block : *sel->blockList)
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 12434f5..14ea719 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -52,10 +52,11 @@ namespace gbe
     /*! Create a new context. name is the name of the function we want to
      *  compile
      */
-    GenContext(const ir::Unit &unit, const std::string &name, uint32_t deviceID, uint32_t reservedSpillRegs = 0,
-               bool limitRegisterPressure = false, bool relaxMath = false);
+    GenContext(const ir::Unit &unit, const std::string &name, uint32_t deviceID, bool relaxMath = false);
     /*! Release everything needed */
     ~GenContext(void);
+    /*! Start new code generation with specific parameters */
+    void startNewCG(uint32_t simdWidth, uint32_t reservedSpillRegs, bool limitRegisterPressure);
     /*! Target device ID*/
     uint32_t deviceID;
     /*! Implements base class */
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 32086d3..c05a97b 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -740,7 +740,10 @@ namespace gbe
         }
 
         if (poolOffset > ctx.reservedSpillRegs) {
-          std::cerr << "Instruction (#" << (uint32_t)insn.opcode << ") src too large pooloffset " << (uint32_t)poolOffset << std::endl;
+          if (GBE_DEBUG)
+            std::cerr << "Instruction (#" << (uint32_t)insn.opcode
+                      << ") src too large pooloffset "
+                      << (uint32_t)poolOffset << std::endl;
           return false;
         }
         while(!regSet.empty()) {
@@ -798,7 +801,10 @@ namespace gbe
         }
 
         if (poolOffset > ctx.reservedSpillRegs){
-          std::cerr << "Instruction (#" << (uint32_t)insn.opcode << ") dst too large pooloffset " << (uint32_t)poolOffset << std::endl;
+          if (GBE_DEBUG)
+            std::cerr << "Instruction (#" << (uint32_t)insn.opcode
+                      << ") dst too large pooloffset "
+                      << (uint32_t)poolOffset << std::endl;
           return false;
         }
         while(!regSet.empty()) {
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index 4dca79d..3a4421a 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -80,9 +80,12 @@ namespace gbe {
     bool limitRegisterPressure;
   } codeGenStrategy[] = {
     {16, 0, false},
+    {16, 5, false},
     {16, 10, false},
     {8, 0, false},
+    {8, 4, false},
     {8, 8, false},
+    {8, 16, false},
   };
 
   Kernel *GenProgram::compileKernel(const ir::Unit &unit, const std::string &name, bool relaxMath) {
@@ -91,11 +94,20 @@ namespace gbe {
     // when the function already provides the simd width we need to use (i.e.
     // non zero)
     const ir::Function *fn = unit.getFunction(name);
-    const uint32_t codeGenNum = fn->getSimdWidth() != 0 ? 2 : 4;
-    uint32_t codeGen = fn->getSimdWidth() == 8 ? 2 : 0;
+    uint32_t codeGenNum = sizeof(codeGenStrategy) / sizeof(codeGenStrategy[0]);
+    uint32_t codeGen = 0;
+    if (fn->getSimdWidth() == 8) {
+      codeGen = 3;
+    } else if (fn->getSimdWidth() == 16) {
+      codeGenNum = 3;
+    } else if (fn->getSimdWidth() == 0) {
+      codeGen = 0;
+    } else
+      GBE_ASSERT(0);
     Kernel *kernel = NULL;
 
     // Stop when compilation is successful
+    GenContext *ctx = GBE_NEW(GenContext, unit, name, deviceID, relaxMath);
     for (; codeGen < codeGenNum; ++codeGen) {
       const uint32_t simdWidth = codeGenStrategy[codeGen].simdWidth;
       const bool limitRegisterPressure = codeGenStrategy[codeGen].limitRegisterPressure;
@@ -103,14 +115,14 @@ namespace gbe {
 
       // Force the SIMD width now and try to compile
       unit.getFunction(name)->setSimdWidth(simdWidth);
-      Context *ctx = GBE_NEW(GenContext, unit, name, deviceID, reservedSpillRegs, limitRegisterPressure, relaxMath);
+      ctx->startNewCG(simdWidth, reservedSpillRegs, limitRegisterPressure);
       kernel = ctx->compileKernel();
       if (kernel != NULL) {
         break;
       }
-      GBE_DELETE(ctx);
       fn->getImageSet()->clearInfo();
     }
+    //GBE_DELETE(ctx);
 
     GBE_ASSERTM(kernel != NULL, "Fail to compile kernel, may need to increase reserved registers for spilling.");
     return kernel;
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 32cd643..54b7cac 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -627,7 +627,6 @@ namespace gbe
         const uint32_t grfOffset = allocateReg(interval, size, alignment);
         if(grfOffset == 0) {
           GBE_ASSERT(!(reservedReg && family != ir::FAMILY_DWORD));
-          GBE_ASSERT(ctx.reservedSpillRegs == 0 || vector->regNum < ctx.reservedSpillRegs);
           for(int i = vector->regNum-1; i >= 0; i--) {
             if (!spillReg(vector->reg[i].reg()))
               return false;
@@ -662,7 +661,8 @@ namespace gbe
       allocateScratchForSpilled();
       bool success = selection.spillRegs(spilledRegs, reservedReg);
       if (!success) {
-        std::cerr << "Fail to spill registers." << std::endl;
+        if (GBE_DEBUG)
+          std::cerr << "Fail to spill registers." << std::endl;
         return false;
       }
     }
@@ -775,6 +775,7 @@ namespace gbe
       // from the RA map.
       bool success = expireReg(interval.reg);
       GBE_ASSERT(success);
+      success = success;
       RA.erase(interval.reg);
     }
     spilledRegs.insert(std::make_pair(interval.reg, spillTag));
-- 
1.8.3.2



More information about the Beignet mailing list