[Libreoffice-commits] core.git: Branch 'feature/calc-group-interpreter-4' - 3 commits - sc/qa sc/source

Mon Nov 18 14:04:44 PST 2013

sc/qa/unit/data/ods/opencl/compiler/horizontal.ods |binary
 sc/qa/unit/opencl-test.cxx                         |   32 +++++++
 sc/source/core/opencl/formulagroupcl.cxx           |   88 ++++++++++-----------
 sc/source/core/opencl/opbase.cxx                   |   12 ++
 sc/source/core/opencl/opbase.hxx                   |    7 -
 5 files changed, 90 insertions(+), 49 deletions(-)

New commits:
commit 41bbf04a5a9cdfc1984e436626222cd806a48a3d
Author: I-Jui (Ray) Sung <ray at multicorewareinc.com>
Date:   Mon Nov 18 15:05:25 2013 -0600

    GPU Calc: turn on parallel sumifs and parallel sum reduce
    
    Change-Id: Id615ea0f5f16a4dfc517aacb30715c2df84553e3

diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 224ed49..17f4afb 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -426,9 +426,10 @@ public:
     }
     virtual bool NeedParallelReduction(void) const
     {
-        if (dynamic_cast<OpSum*>(mpCodeGen.get())
-            && !dynamic_cast<OpAverage*>(mpCodeGen.get()))
-            return GetWindowSize()> 100 &&
+        if ((dynamic_cast<OpSum*>(mpCodeGen.get())
+            && !dynamic_cast<OpAverage*>(mpCodeGen.get())) ||
+            dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
+            return GetWindowSize()> 4 &&
                 ( (GetStartFixed() && GetEndFixed()) ||
                   (!GetStartFixed() && !GetEndFixed())  ) ;
         else
@@ -457,10 +458,10 @@ public:
             ss << "    tmp = 0.0;\n";
             ss << "    int loopOffset = l*512;\n";
             ss << "    if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n";
-            ss << "        tmp = A[loopOffset + lidx + offset] + "
-                "A[loopOffset + lidx + offset + 256];\n";
+            ss << "        tmp = fsum(A[loopOffset + lidx + offset], 0) + "
+                "fsum(A[loopOffset + lidx + offset + 256], 0);\n";
             ss << "    else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n";
-            ss << "        tmp = A[loopOffset + lidx + offset];\n";
+            ss << "        tmp = fsum(A[loopOffset + lidx + offset], 0);\n";
             ss << "    shm_buf[lidx] = tmp;\n";
             ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
             ss << "    for (int i = 128; i >0; i/=2) {\n";
@@ -496,7 +497,8 @@ public:
         size_t nCurWindowSize = mpDVR->GetRefRowSize();
         if (dynamic_cast<OpSum*>(mpCodeGen.get()))
         {
-            if (!bIsStartFixed && !bIsEndFixed)
+            if ((!bIsStartFixed && !bIsEndFixed) ||
+                (bIsStartFixed && bIsEndFixed))
             {
                 // set 100 as a temporary threshold for invoking reduction
                 // kernel in NeedParalleLReduction function
@@ -510,21 +512,6 @@ public:
                     return nCurWindowSize;
                 }
             }
-
-            if (bIsStartFixed && bIsEndFixed)
-            {
-                // set 100 as a temporary threshold for invoking reduction
-                // kernel in NeedParalleLReduction function
-                if (NeedParallelReduction())
-                {
-                    std::string temp = Base::GetName() + "[0]";
-                    ss << "tmp = ";
-                    ss << mpCodeGen->Gen2(temp, "tmp");
-                    ss << ";\n\t";
-                    needBody = false;
-                    return nCurWindowSize;
-                }
-            }
         }
         needBody = true;
 
@@ -576,7 +563,8 @@ public:
 
     virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
     {
-        if (!NeedParallelReduction())
+        if (!NeedParallelReduction() ||
+            dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
             return Base::Marshal(k, argno, w, mpProgram);
 
         assert(Base::mpClmem == NULL);
commit c277e98f9ce1b57794311ef45aca734b8d4dee85
Author: I-Jui (Ray) Sung <ray at multicorewareinc.com>
Date:   Mon Nov 18 13:39:02 2013 -0600

    GPU Calc: testcases for horizontal ranges
    
    AMLOEXT-242 BUG
    
    Change-Id: I4b87bdf6183ed81ad767550f5cd49aab51531cf2

diff --git a/sc/qa/unit/data/ods/opencl/compiler/horizontal.ods b/sc/qa/unit/data/ods/opencl/compiler/horizontal.ods
new file mode 100644
index 0000000..18edf64
Binary files /dev/null and b/sc/qa/unit/data/ods/opencl/compiler/horizontal.ods differ
diff --git a/sc/qa/unit/opencl-test.cxx b/sc/qa/unit/opencl-test.cxx
index b2d4b87..86a73b0 100644
--- a/sc/qa/unit/opencl-test.cxx
+++ b/sc/qa/unit/opencl-test.cxx
@@ -83,6 +83,7 @@ public:
     void testFinacialRateFormula();
     void testFinancialAccrintmFormula();
     void testFinancialAccrintFormula();
+    void testCompilerHorizontal();
     void testCompilerNested();
     void testFinacialSLNFormula();
     void testStatisticalFormulaGammaLn();
@@ -253,6 +254,7 @@ public:
     CPPUNIT_TEST(testFinacialIRRFormula);
     CPPUNIT_TEST(testFinacialMIRRFormula);
     CPPUNIT_TEST(testFinacialRateFormula);
+    CPPUNIT_TEST(testCompilerHorizontal);
     CPPUNIT_TEST(testCompilerNested);
     CPPUNIT_TEST(testFinacialSLNFormula);
     CPPUNIT_TEST(testFinancialAccrintmFormula);
@@ -445,6 +447,36 @@ void ScOpenclTest::enableOpenCL()
     sc::FormulaGroupInterpreter::enableOpenCL(true);
 }
 
+void ScOpenclTest::testCompilerHorizontal()
+{
+    if (!detectOpenCLDevice())
+        return;
+
+    ScDocShellRef xDocSh = loadDoc("opencl/compiler/horizontal.", ODS);
+    ScDocument* pDoc = xDocSh->GetDocument();
+    CPPUNIT_ASSERT(pDoc);
+    enableOpenCL();
+    pDoc->CalcAll();
+
+    ScDocShellRef xDocShRes = loadDoc("opencl/compiler/horizontal.", ODS);
+    ScDocument* pDocRes = xDocShRes->GetDocument();
+    CPPUNIT_ASSERT(pDocRes);
+    // Check the results of formula cells in the shared formula range.
+    for (SCROW i = 1; i < 5; ++i)
+    {
+        double fLibre = pDoc->GetValue(ScAddress(12, i, 0));
+        double fExcel = pDocRes->GetValue(ScAddress(12, i, 0));
+        CPPUNIT_ASSERT_DOUBLES_EQUAL(fExcel, fLibre, fabs(0.0001*fExcel));
+        fLibre = pDoc->GetValue(ScAddress(13, i, 0));
+        fExcel = pDocRes->GetValue(ScAddress(13, i, 0));
+        CPPUNIT_ASSERT_DOUBLES_EQUAL(fExcel, fLibre, fabs(0.0001*fExcel));
+        fLibre = pDoc->GetValue(ScAddress(14, i, 0));
+        fExcel = pDocRes->GetValue(ScAddress(14, i, 0));
+        CPPUNIT_ASSERT_DOUBLES_EQUAL(fExcel, fLibre, fabs(0.0001*fExcel));
+    }
+    xDocSh->DoClose();
+    xDocShRes->DoClose();
+}
 void ScOpenclTest::testCompilerNested()
 {
     if (!detectOpenCLDevice())
commit f9b2e576407d139d0c67d54f03c05dca56dee4e0
Author: I-Jui (Ray) Sung <ray at multicorewareinc.com>
Date:   Mon Nov 18 14:46:30 2013 -0600

    GPU Calc: implement horizontal ranges as multiple VectorRefs
    
    AMLOEXT-242 Fix
    
    Change-Id: Ia3deb221528230554b7c431e926b10428441666a

diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index f6e6baf..224ed49 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -65,9 +65,10 @@ size_t VectorRef::Marshal(cl_kernel k, int argno, int, cl_program)
         const formula::DoubleVectorRefToken* pDVR =
             dynamic_cast< const formula::DoubleVectorRefToken* >(ref);
         assert(pDVR);
-        if (pDVR->GetArrays()[0].mpNumericArray == NULL)
+        if (pDVR->GetArrays()[mnIndex].mpNumericArray == NULL)
             throw Unhandled();
-        pHostBuffer = const_cast<double*>(pDVR->GetArrays()[0].mpNumericArray);
+        pHostBuffer = const_cast<double*>(
+                pDVR->GetArrays()[mnIndex].mpNumericArray);
         szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
     } else {
         throw Unhandled();
@@ -281,8 +282,8 @@ class DynamicKernelStringArgument: public VectorRef
 {
 public:
     DynamicKernelStringArgument(const std::string &s,
-        FormulaTreeNodeRef ft):
-        VectorRef(s, ft) {}
+        FormulaTreeNodeRef ft, int index = 0):
+        VectorRef(s, ft, index) {}
 
     virtual void GenSlidingWindowFunction(std::stringstream &) {}
     /// Generate declaration
@@ -319,7 +320,7 @@ size_t DynamicKernelStringArgument::Marshal(cl_kernel k, int argno, int, cl_prog
             dynamic_cast< const formula::DoubleVectorRefToken* >(ref);
         assert(pDVR);
         nStrings = pDVR->GetArrayLength();
-        vRef = pDVR->GetArrays()[0];
+        vRef = pDVR->GetArrays()[mnIndex];
     }
     size_t szHostBuffer = nStrings * sizeof(cl_int);
     // Marshal strings. Right now we pass hashes of these string
@@ -411,8 +412,9 @@ class DynamicKernelSlidingArgument: public Base
 {
 public:
     DynamicKernelSlidingArgument(const std::string &s,
-        FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen):
-        Base(s, ft), mpCodeGen(CodeGen), mpClmem2(NULL)
+        FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen,
+        int index=0):
+        Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
     {
         FormulaToken *t = ft->GetFormulaToken();
         if (t->GetType() != formula::svDoubleVectorRef)
@@ -491,9 +493,6 @@ public:
         std::stringstream &ss, bool &needBody)
     {
         assert(mpDVR);
-        // Do not handle horizontal double vectors yet
-        if (mpDVR->GetArrays().size() > 1)
-            throw Unhandled();
         size_t nCurWindowSize = mpDVR->GetRefRowSize();
         if (dynamic_cast<OpSum*>(mpCodeGen.get()))
         {
@@ -528,6 +527,15 @@ public:
             }
         }
         needBody = true;
+
+        // No need to generate a for-loop for degenerated cases
+        if (nCurWindowSize == 1)
+        {
+            ss << "if (gid0 <" << mpDVR->GetArrayLength();
+            ss << ")\n\t{\tint i = 0;\n\t\t";
+            return nCurWindowSize;
+        }
+
         ss << "for (int i = ";
         if (!bIsStartFixed && bIsEndFixed)
         {
@@ -579,10 +587,10 @@ public:
         size_t nInput = mpDVR->GetArrayLength();
         size_t nCurWindowSize = mpDVR->GetRefRowSize();
         // create clmem buffer
-        if (mpDVR->GetArrays()[0].mpNumericArray == NULL)
+        if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == NULL)
             throw Unhandled();
         double *pHostBuffer = const_cast<double*>(
-                mpDVR->GetArrays()[0].mpNumericArray);
+                mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
         size_t szHostBuffer = nInput * sizeof(double);
         Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
                 (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
@@ -1269,15 +1277,19 @@ DynamicKernelSoPArguments::DynamicKernelSoPArguments(
                     const formula::DoubleVectorRefToken* pDVR =
                         dynamic_cast< const formula::DoubleVectorRefToken* >(pChild);
                     assert(pDVR);
-                    if (pDVR->GetArrays()[0].mpNumericArray)
-                        mvSubArguments.push_back(
-                                SubArgument(new DynamicKernelSlidingArgument
-                                    <VectorRef>(ts, ft->Children[i], mpCodeGen)));
-                    else
-                        mvSubArguments.push_back(
-                                SubArgument(new DynamicKernelSlidingArgument
-                                    <DynamicKernelStringArgument>(
-                                        ts, ft->Children[i], mpCodeGen)));
+                    for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
+                    {
+                        if (pDVR->GetArrays()[j].mpNumericArray)
+                            mvSubArguments.push_back(
+                                    SubArgument(new DynamicKernelSlidingArgument
+                                        <VectorRef>(
+                                            ts, ft->Children[i], mpCodeGen, j)));
+                        else
+                            mvSubArguments.push_back(
+                                    SubArgument(new DynamicKernelSlidingArgument
+                                        <DynamicKernelStringArgument>(
+                                            ts, ft->Children[i], mpCodeGen, j)));
+                    }
                 } else if (pChild->GetType() == formula::svSingleVectorRef) {
                     const formula::SingleVectorRefToken* pSVR =
                         dynamic_cast< const formula::SingleVectorRefToken* >(pChild);
diff --git a/sc/source/core/opencl/opbase.cxx b/sc/source/core/opencl/opbase.cxx
index 41e5528..6bb866c 100644
--- a/sc/source/core/opencl/opbase.cxx
+++ b/sc/source/core/opencl/opbase.cxx
@@ -30,8 +30,16 @@ FormulaToken* DynamicKernelArgument::GetFormulaToken(void) const
     return mFormulaTree->GetFormulaToken();
 }
 
-VectorRef::VectorRef(const std::string &s, FormulaTreeNodeRef ft):
-    DynamicKernelArgument(s, ft), mpClmem(NULL) {}
+VectorRef::VectorRef(const std::string &s, FormulaTreeNodeRef ft, int idx):
+    DynamicKernelArgument(s, ft), mpClmem(NULL), mnIndex(idx)
+{
+    if (mnIndex)
+    {
+        std::stringstream ss;
+        ss << mSymName << "s" << mnIndex;
+        mSymName = ss.str();
+    }
+}
 
 VectorRef::~VectorRef()
 {
diff --git a/sc/source/core/opencl/opbase.hxx b/sc/source/core/opencl/opbase.hxx
index 11b66df..d921119 100644
--- a/sc/source/core/opencl/opbase.hxx
+++ b/sc/source/core/opencl/opbase.hxx
@@ -103,7 +103,7 @@ public:
     virtual bool NeedParallelReduction(void) const { return false; }
 
 protected:
-    const std::string mSymName;
+    std::string mSymName;
     FormulaTreeNodeRef mFormulaTree;
 };
 
@@ -115,12 +115,11 @@ protected:
 class VectorRef : public DynamicKernelArgument
 {
 public:
-    VectorRef(const std::string &s, FormulaTreeNodeRef ft);
+    VectorRef(const std::string &s, FormulaTreeNodeRef ft, int index = 0);
 
     const std::string &GetNameAsString(void) const { return mSymName; }
     /// Generate declaration
     virtual void GenDecl(std::stringstream &ss) const;
-
     /// When declared as input to a sliding window function
     virtual void GenSlidingWindowDecl(std::stringstream &ss) const;
 
@@ -146,6 +145,8 @@ public:
 protected:
     // Used by marshaling
     cl_mem mpClmem;
+    // index in multiple double vector refs that have multiple ranges
+    const int mnIndex;
 };
 /// Abstract class for code generation