[Beignet] [PATCH] backend: add denorm support to double operations

Wed Apr 19 04:58:13 UTC 2017

set Double Precision Denorm Mode bit in control register to enable denorm of double
	when there is a double operator. this is suppoted form IVB, so it is set in GenContext

Signed-off-by: rander.wang <rander.wang at intel.com>
---
 backend/src/backend/gen_context.cpp   | 15 +++++++++++++++
 backend/src/backend/gen_context.hpp   |  1 +
 backend/src/ir/unit.cpp               |  1 +
 backend/src/ir/unit.hpp               |  3 +++
 backend/src/llvm/llvm_gen_backend.cpp |  3 ++-
 5 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index bb104cf..3ef8a4d 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2595,6 +2595,20 @@ namespace gbe
     return;
   }
 
+  void GenContext::DenormEnabledFP64(void)
+  {
+       if(unit.getFP64Enabled())
+       {
+            int denormFlags = 0x00000040;
+            p->push();
+            p->curr.noMask = 1;
+            p->curr.predicate = GEN_PREDICATE_NONE;
+            p->curr.execWidth = 1;
+            p->OR(ra->genReg(GenRegister::ud1arf(GEN_ARF_CONTROL, 0)), (GenRegister::ud1arf(GEN_ARF_CONTROL, 0)), GenRegister::immw(denormFlags));
+            p->pop();
+       }
+  }
+
   void GenContext::subTimestamps(GenRegister& t0, GenRegister& t1, GenRegister& tmp)
   {
     p->push(); {
@@ -3655,6 +3669,7 @@ namespace gbe
     this->emitStackPointer();
     this->clearFlagRegister();
     this->emitSLMOffset();
+    this->DenormEnabledFP64();
     this->emitInstructionStream();
     if (this->patchBranches() == false)
       return false;
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 7fd40d1..269b16a 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -241,6 +241,7 @@ namespace gbe
 
     virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset = 0, int sz = 0);
     void calcGlobalXYZRange(GenRegister& reg, GenRegister& tmp, int flag, int subFlag);
+    void DenormEnabledFP64(void);
     virtual void subTimestamps(GenRegister& t0, GenRegister& t1, GenRegister& tmp);
     virtual void addTimestamps(GenRegister& t0, GenRegister& t1, GenRegister& tmp);
     virtual void emitPrintfLongInstruction(GenRegister& addr, GenRegister& data, GenRegister& src, uint32_t bti, bool useSends);
diff --git a/backend/src/ir/unit.cpp b/backend/src/ir/unit.cpp
index 79e129d..b82c0d8 100644
--- a/backend/src/ir/unit.cpp
+++ b/backend/src/ir/unit.cpp
@@ -31,6 +31,7 @@ namespace ir {
     profilingInfo = GBE_NEW(ProfilingInfo);
     inProfilingMode = false;
     oclVersion = 120;
+    isFP64Enabled = false;
   }
   Unit::~Unit(void) {
     for (const auto &pair : functions) GBE_DELETE(pair.second);
diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp
index 46d7be7..7596da1 100644
--- a/backend/src/ir/unit.hpp
+++ b/backend/src/ir/unit.hpp
@@ -92,6 +92,8 @@ namespace ir {
     bool getValid() { return valid; }
     void setOclVersion(uint32_t version) { oclVersion = version; }
     uint32_t getOclVersion() const { return oclVersion; }
+    void setFP64Enabled() { isFP64Enabled = true;}
+    bool getFP64Enabled() const {return isFP64Enabled;}
   private:
     friend class ContextInterface; //!< Can free modify the unit
     FunctionSet functions; //!< All the defined functions
@@ -103,6 +105,7 @@ namespace ir {
     uint32_t oclVersion;
     bool valid;
     bool inProfilingMode;
+    bool isFP64Enabled;
   };
 
   /*! Output the unit string in the given stream */
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 18ae3d5..95de20b 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1916,10 +1916,11 @@ namespace gbe
     auto type = value->getType();
     auto typeID = type->getTypeID();
     switch (typeID) {
+      case Type::DoubleTyID:
+	unit.setFP64Enabled();
       case Type::IntegerTyID:
       case Type::FloatTyID:
       case Type::HalfTyID:
-      case Type::DoubleTyID:
       case Type::PointerTyID:
         regTranslator.newScalar(value, key, 0, uniform);
         break;
-- 
2.7.4