[Libreoffice-commits] core.git: Branch 'feature/calc-group-interpreter-4' - 2 commits - sc/source

Tue Nov 19 20:22:13 PST 2013

sc/source/core/opencl/formulagroupcl.cxx |   99 +++++++++++++++----------------
 1 file changed, 48 insertions(+), 51 deletions(-)

New commits:
commit 8441dd5c9aa58e00fd1d70f68df2d624120007a9
Author: I-Jui (Ray) Sung <ray at multicorewareinc.com>
Date:   Tue Nov 19 22:21:24 2013 -0600

    GPU Calc: Fix COUNT() regression
    
    Change-Id: I1b313d7f5f144f1884abe60c816db1fd6a643489

diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 0a33eb7..dbefbf8 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -24,8 +24,11 @@
 #include "op_logical.hxx"
 #include "op_statistical.hxx"
 #include "op_array.hxx"
+/// CONFIGURATIONS
 // Comment out this to turn off FMIN and FMAX intrinsics
 #define USE_FMIN_FMAX 1
+#define REDUCE_THRESHOLD 4  // set to 4 for correctness testing. priority 1
+#define UNROLLING_FACTOR 16  // set to 4 for correctness testing (if no reduce)
 #include "formulagroupcl_public.hxx"
 
 #include <list>
@@ -41,7 +44,6 @@
 
 #include <boost/scoped_ptr.hpp>
 
-#define UNROLLING
 
 using namespace formula;
 
@@ -414,6 +416,7 @@ class OpSum; // Forward Declaration
 class OpAverage; // Forward Declaration
 class OpMin; // Forward Declaration
 class OpMax; // Forward Declaration
+class OpCount; // Forward Declaration
 template<class Base>
 class DynamicKernelSlidingArgument: public Base
 {
@@ -450,7 +453,7 @@ public:
         assert(mpDVR);
         size_t nCurWindowSize = mpDVR->GetRefRowSize();
         // original for loop
-#ifndef UNROLLING
+#ifndef UNROLLING_FACTOR
         needBody = true;
         // No need to generate a for-loop for degenerated cases
         if (nCurWindowSize == 1)
@@ -497,7 +500,7 @@ public:
 return nCurWindowSize;
 #endif
 
-#ifdef UNROLLING
+#ifdef UNROLLING_FACTOR
         {
             if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) {
                 ss << "for (int i = ";
@@ -515,7 +518,7 @@ return nCurWindowSize;
                 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
                 ss << "{int i;\n\t";
                 std::stringstream temp1,temp2;
-                int outLoopSize = 16;
+                int outLoopSize = UNROLLING_FACTOR;
                 if ( nCurWindowSize/outLoopSize != 0){
                     ss << "for(int outLoop=0; outLoop<" << nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
                     for(int count=0; count < outLoopSize; count++){
@@ -523,18 +526,10 @@ return nCurWindowSize;
                         if(count==0){
                             temp1 << "if(i + gid0 < " <<mpDVR->GetArrayLength();
                             temp1 << "){\n\t\t";
-                            temp1 << "if (isNan(";
-                            temp1 <<  GenSlidingWindowDeclRef();
-                            temp1 << ")){\n\t\t\t";
-                            temp1 << "tmp = ";
-                            temp1 <<  mpCodeGen->Gen2("tmpBottom", "tmp") << ";\n\t\t";
-                            temp1 << "}else{\n\t\t\t";
-                            temp1 << "tmp = ";
                             temp1 <<  mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
                             temp1 << ";\n\t\t\t";
                             temp1 << "nCount += 1;\n\t\t";
                             temp1 << "}\n\t";
-                            temp1 << "}\n\t";
                         }
                         ss << temp1.str();
                     }
@@ -546,18 +541,11 @@ return nCurWindowSize;
                     if(count==nCurWindowSize/outLoopSize*outLoopSize){
                         temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
                         temp2 << "){\n\t\t";
-                        temp2 << "if (isNan(";
-                        temp2 << GenSlidingWindowDeclRef();
-                        temp2 << ")){\n\t\t\t";
-                        temp2 << "tmp = ";
-                        temp2 << mpCodeGen->Gen2("tmpBottom", "tmp") << ";\n\t\t";
-                        temp2 << "}else{\n\t\t\t";
                         temp2 << "tmp = ";
                         temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
                         temp2 << ";\n\t\t\t";
                         temp2 << "nCount += 1;\n\t\t";
                         temp2 << "}\n\t";
-                        temp2 << "}\n\t";
                     }
                     ss << temp2.str();
                 }
@@ -571,23 +559,16 @@ return nCurWindowSize;
                 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
                 ss << "{int i;\n\t";
                 std::stringstream temp1,temp2;
-                int outLoopSize = 16;
+                int outLoopSize = UNROLLING_FACTOR;
                 if (nCurWindowSize/outLoopSize != 0){
                     ss << "for(int outLoop=0; outLoop<" << nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
                     for(int count=0; count < outLoopSize; count++){
                         ss << "i = outLoop*"<<outLoopSize<<"+"<<count<<";\n\t";
                         if(count==0){
-                            temp1 << "if (isNan(";
-                            temp1 << GenSlidingWindowDeclRef();
-                            temp1 << ")){\n\t\t\t";
-                            temp1 << "tmp = ";
-                            temp1 << mpCodeGen->Gen2("tmpBottom", "tmp") << ";\n\t\t";
-                            temp1 << "}else{\n\t\t\t";
                             temp1 << "tmp = ";
                             temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
                             temp1 << ";\n\t\t\t";
                             temp1 << "nCount += 1;\n\t\t";
-                            temp1 << "}\n\t";
                         }
                         ss << temp1.str();
                     }
@@ -597,17 +578,10 @@ return nCurWindowSize;
                 for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize; count < nCurWindowSize; count++){
                     ss << "i = "<<count<<";\n\t";
                     if(count==nCurWindowSize/outLoopSize*outLoopSize){
-                        temp2 << "if (isNan(";
-                        temp2 << GenSlidingWindowDeclRef();
-                        temp2 << ")){\n\t\t\t";
-                        temp2 << "tmp = ";
-                        temp2 << mpCodeGen->Gen2("tmpBottom", "tmp") << ";\n\t\t";
-                        temp2 << "}else{\n\t\t\t";
                         temp2 << "tmp = ";
                         temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
                         temp2 << ";\n\t\t\t";
                         temp2 << "nCount += 1;\n\t\t";
-                        temp2 << "}\n\t";
                     }
                     ss << temp2.str();
                 }
@@ -694,12 +668,12 @@ public:
         ss << "    for (int l=0; l<loop; l++){\n";
         ss << "    tmp = "<< mpCodeGen->GetBottom() << ";\n";
         ss << "    int loopOffset = l*512;\n";
-        ss << "    if((loopOffset + lidx + offset + 256) < end)\n";
+        ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
         ss << "        tmp = " << mpCodeGen->Gen2(
                 "A[loopOffset + lidx + offset]", "tmp") <<";\n";
         ss << "        tmp = " << mpCodeGen->Gen2(
                 "A[loopOffset + lidx + offset + 256]", "tmp") << ";\n";
-        ss << "    else if ((loopOffset + lidx + offset) < end)\n";
+        ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
         ss << "        tmp = " << mpCodeGen->Gen2(
                 "A[loopOffset + lidx + offset]", "tmp") <<";\n";
         ss << "    shm_buf[lidx] = tmp;\n";
@@ -707,15 +681,21 @@ public:
         ss << "    for (int i = 128; i >0; i/=2) {\n";
         ss << "        if (lidx < i)\n";
         ss << "            shm_buf[lidx] = ";
-        ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]");
-        ss << ";";
+        // Special case count
+        if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+            ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+        else
+            ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]")<<";\n";
         ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
         ss << "    }\n";
-        ss << "    if (lidx == 0)\n";
-        ss << "        current_result =";
-        ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
+        ss << "        if (lidx == 0)\n";
+        ss << "            current_result =";
+        if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+            ss << "shm_buf[0]";
+        else
+            ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
         ss << ";\n";
-        ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+        ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
         ss << "    }\n";
         ss << "    if (lidx == 0)\n";
         ss << "        result[writePos] = current_result;\n";
@@ -740,7 +720,11 @@ public:
         size_t nCurWindowSize = mpDVR->GetRefRowSize();
         std::string temp = Base::GetName() + "[gid0]";
         ss << "tmp = ";
-        ss << mpCodeGen->Gen2(temp, "tmp");
+        // Special case count
+        if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+            ss << temp << "+ tmp";
+        else
+            ss << mpCodeGen->Gen2(temp, "tmp");
         ss << ";\n\t";
         needBody = false;
         return nCurWindowSize;
@@ -1030,7 +1014,7 @@ public:
         ss << ") {\n";
         ss << "    double tmp = 0.0;\n";
         ss << "    int gid0 = get_global_id(0);\n";
-#ifndef UNROLLING
+#ifndef UNROLLING_FACTOR
         ss << "    int i ;\n";
         ss << "    for (i = 0; i < "<< nCurWindowSize <<"; i++)\n";
         ss << "    {\n";
@@ -1100,11 +1084,11 @@ public:
         ss << "}";
 #endif
 
-#ifdef UNROLLING
+#ifdef UNROLLING_FACTOR
         ss << "\tint i;\n\t";
         ss << "int currentCount0, currentCount1;\n\t";
         std::stringstream temp3,temp4;
-        int outLoopSize = 16;
+        int outLoopSize = UNROLLING_FACTOR;
         if (nCurWindowSize/outLoopSize != 0){
             ss << "for(int outLoop=0; outLoop<" <<
             nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
@@ -1554,6 +1538,11 @@ DynamicKernelArgument *VectorRefFactory(const std::string &s,
     {
         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
     }
+    // MUL is not supported yet
+    else if (dynamic_cast<OpMul*>(pCodeGen.get()))
+    {
+        return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
+    }
     // Sub is not a reduction per se
     else if (dynamic_cast<OpSub*>(pCodeGen.get()))
     {
@@ -1569,14 +1558,11 @@ DynamicKernelArgument *VectorRefFactory(const std::string &s,
         dynamic_cast< const formula::DoubleVectorRefToken* >(
                 ft->GetFormulaToken());
     // Window being too small to justify a parallel reduction
-    if (pDVR->GetRefRowSize() < 4)
+    if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
     if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) ||
             (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
         return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index);
-    if ((pDVR->IsStartFixed() && !pDVR->IsEndFixed()) ||
-            (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
-        return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index);
     else // Other cases are not supported as well
         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
 }
commit c0548c4f6363042432b2052362fd12d2a81532d8
Author: I-Jui (Ray) Sung <ray at multicorewareinc.com>
Date:   Tue Nov 19 20:32:48 2013 -0600

    GPU Calc: support reductions without uniform window sizes
    
    Change-Id: Iddd7a1bbc51f02b6b950c34afd9cbe95ec09bbf9

diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index d283174..0a33eb7 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -675,23 +675,31 @@ public:
         ss << "    int writePos = get_group_id(1);\n";
         ss << "    int lidx = get_local_id(0);\n";
         ss << "    __local double shm_buf[256];\n";
-        if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+        if (mpDVR->IsStartFixed())
             ss << "    int offset = 0;\n";
-        else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+        else // if (!mpDVR->IsStartFixed())
             ss << "    int offset = get_group_id(1);\n";
-        else
-            throw Unhandled();
+        if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+            ss << "    int end = windowSize;\n";
+        else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+            ss << "    int end = offset + windowSize;\n";
+        else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+            ss << "    int end = windowSize + get_group_id(1);\n";
+        else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+            ss << "    int end = windowSize;\n";
+        ss << "    end = min(end, arrayLength);\n";
+
         ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
         ss << "    int loop = arrayLength/512 + 1;\n";
         ss << "    for (int l=0; l<loop; l++){\n";
         ss << "    tmp = "<< mpCodeGen->GetBottom() << ";\n";
         ss << "    int loopOffset = l*512;\n";
-        ss << "    if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n";
+        ss << "    if((loopOffset + lidx + offset + 256) < end)\n";
         ss << "        tmp = " << mpCodeGen->Gen2(
                 "A[loopOffset + lidx + offset]", "tmp") <<";\n";
         ss << "        tmp = " << mpCodeGen->Gen2(
                 "A[loopOffset + lidx + offset + 256]", "tmp") << ";\n";
-        ss << "    else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n";
+        ss << "    else if ((loopOffset + lidx + offset) < end)\n";
         ss << "        tmp = " << mpCodeGen->Gen2(
                 "A[loopOffset + lidx + offset]", "tmp") <<";\n";
         ss << "    shm_buf[lidx] = tmp;\n";
@@ -1561,11 +1569,14 @@ DynamicKernelArgument *VectorRefFactory(const std::string &s,
         dynamic_cast< const formula::DoubleVectorRefToken* >(
                 ft->GetFormulaToken());
     // Window being too small to justify a parallel reduction
-    if (pDVR->GetRefRowSize() < 100)
+    if (pDVR->GetRefRowSize() < 4)
         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
     if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) ||
             (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
         return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index);
+    if ((pDVR->IsStartFixed() && !pDVR->IsEndFixed()) ||
+            (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
+        return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index);
     else // Other cases are not supported as well
         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
 }