[Libreoffice-commits] core.git: Branch 'feature/calc-group-interpreter-4' - 3 commits - sc/qa sc/source
I-Jui Sung (Ray)
ray at multicorewareinc.com
Mon Nov 18 14:04:44 PST 2013
sc/qa/unit/data/ods/opencl/compiler/horizontal.ods |binary
sc/qa/unit/opencl-test.cxx | 32 +++++++
sc/source/core/opencl/formulagroupcl.cxx | 88 ++++++++++-----------
sc/source/core/opencl/opbase.cxx | 12 ++
sc/source/core/opencl/opbase.hxx | 7 -
5 files changed, 90 insertions(+), 49 deletions(-)
New commits:
commit 41bbf04a5a9cdfc1984e436626222cd806a48a3d
Author: I-Jui (Ray) Sung <ray at multicorewareinc.com>
Date: Mon Nov 18 15:05:25 2013 -0600
GPU Calc: turn on parallel sumifs and parallel sum reduce
Change-Id: Id615ea0f5f16a4dfc517aacb30715c2df84553e3
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 224ed49..17f4afb 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -426,9 +426,10 @@ public:
}
virtual bool NeedParallelReduction(void) const
{
- if (dynamic_cast<OpSum*>(mpCodeGen.get())
- && !dynamic_cast<OpAverage*>(mpCodeGen.get()))
- return GetWindowSize()> 100 &&
+ if ((dynamic_cast<OpSum*>(mpCodeGen.get())
+ && !dynamic_cast<OpAverage*>(mpCodeGen.get())) ||
+ dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
+ return GetWindowSize()> 4 &&
( (GetStartFixed() && GetEndFixed()) ||
(!GetStartFixed() && !GetEndFixed()) ) ;
else
@@ -457,10 +458,10 @@ public:
ss << " tmp = 0.0;\n";
ss << " int loopOffset = l*512;\n";
ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n";
- ss << " tmp = A[loopOffset + lidx + offset] + "
- "A[loopOffset + lidx + offset + 256];\n";
+ ss << " tmp = fsum(A[loopOffset + lidx + offset], 0) + "
+ "fsum(A[loopOffset + lidx + offset + 256], 0);\n";
ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n";
- ss << " tmp = A[loopOffset + lidx + offset];\n";
+ ss << " tmp = fsum(A[loopOffset + lidx + offset], 0);\n";
ss << " shm_buf[lidx] = tmp;\n";
ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
ss << " for (int i = 128; i >0; i/=2) {\n";
@@ -496,7 +497,8 @@ public:
size_t nCurWindowSize = mpDVR->GetRefRowSize();
if (dynamic_cast<OpSum*>(mpCodeGen.get()))
{
- if (!bIsStartFixed && !bIsEndFixed)
+ if ((!bIsStartFixed && !bIsEndFixed) ||
+ (bIsStartFixed && bIsEndFixed))
{
// set 100 as a temporary threshold for invoking reduction
// kernel in NeedParalleLReduction function
@@ -510,21 +512,6 @@ public:
return nCurWindowSize;
}
}
-
- if (bIsStartFixed && bIsEndFixed)
- {
- // set 100 as a temporary threshold for invoking reduction
- // kernel in NeedParalleLReduction function
- if (NeedParallelReduction())
- {
- std::string temp = Base::GetName() + "[0]";
- ss << "tmp = ";
- ss << mpCodeGen->Gen2(temp, "tmp");
- ss << ";\n\t";
- needBody = false;
- return nCurWindowSize;
- }
- }
}
needBody = true;
@@ -576,7 +563,8 @@ public:
virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
{
- if (!NeedParallelReduction())
+ if (!NeedParallelReduction() ||
+ dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
return Base::Marshal(k, argno, w, mpProgram);
assert(Base::mpClmem == NULL);
commit c277e98f9ce1b57794311ef45aca734b8d4dee85
Author: I-Jui (Ray) Sung <ray at multicorewareinc.com>
Date: Mon Nov 18 13:39:02 2013 -0600
GPU Calc: testcases for horizontal ranges
AMLOEXT-242 BUG
Change-Id: I4b87bdf6183ed81ad767550f5cd49aab51531cf2
diff --git a/sc/qa/unit/data/ods/opencl/compiler/horizontal.ods b/sc/qa/unit/data/ods/opencl/compiler/horizontal.ods
new file mode 100644
index 0000000..18edf64
Binary files /dev/null and b/sc/qa/unit/data/ods/opencl/compiler/horizontal.ods differ
diff --git a/sc/qa/unit/opencl-test.cxx b/sc/qa/unit/opencl-test.cxx
index b2d4b87..86a73b0 100644
--- a/sc/qa/unit/opencl-test.cxx
+++ b/sc/qa/unit/opencl-test.cxx
@@ -83,6 +83,7 @@ public:
void testFinacialRateFormula();
void testFinancialAccrintmFormula();
void testFinancialAccrintFormula();
+ void testCompilerHorizontal();
void testCompilerNested();
void testFinacialSLNFormula();
void testStatisticalFormulaGammaLn();
@@ -253,6 +254,7 @@ public:
CPPUNIT_TEST(testFinacialIRRFormula);
CPPUNIT_TEST(testFinacialMIRRFormula);
CPPUNIT_TEST(testFinacialRateFormula);
+ CPPUNIT_TEST(testCompilerHorizontal);
CPPUNIT_TEST(testCompilerNested);
CPPUNIT_TEST(testFinacialSLNFormula);
CPPUNIT_TEST(testFinancialAccrintmFormula);
@@ -445,6 +447,36 @@ void ScOpenclTest::enableOpenCL()
sc::FormulaGroupInterpreter::enableOpenCL(true);
}
+void ScOpenclTest::testCompilerHorizontal()
+{
+ if (!detectOpenCLDevice())
+ return;
+
+ ScDocShellRef xDocSh = loadDoc("opencl/compiler/horizontal.", ODS);
+ ScDocument* pDoc = xDocSh->GetDocument();
+ CPPUNIT_ASSERT(pDoc);
+ enableOpenCL();
+ pDoc->CalcAll();
+
+ ScDocShellRef xDocShRes = loadDoc("opencl/compiler/horizontal.", ODS);
+ ScDocument* pDocRes = xDocShRes->GetDocument();
+ CPPUNIT_ASSERT(pDocRes);
+ // Check the results of formula cells in the shared formula range.
+ for (SCROW i = 1; i < 5; ++i)
+ {
+ double fLibre = pDoc->GetValue(ScAddress(12, i, 0));
+ double fExcel = pDocRes->GetValue(ScAddress(12, i, 0));
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(fExcel, fLibre, fabs(0.0001*fExcel));
+ fLibre = pDoc->GetValue(ScAddress(13, i, 0));
+ fExcel = pDocRes->GetValue(ScAddress(13, i, 0));
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(fExcel, fLibre, fabs(0.0001*fExcel));
+ fLibre = pDoc->GetValue(ScAddress(14, i, 0));
+ fExcel = pDocRes->GetValue(ScAddress(14, i, 0));
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(fExcel, fLibre, fabs(0.0001*fExcel));
+ }
+ xDocSh->DoClose();
+ xDocShRes->DoClose();
+}
void ScOpenclTest::testCompilerNested()
{
if (!detectOpenCLDevice())
commit f9b2e576407d139d0c67d54f03c05dca56dee4e0
Author: I-Jui (Ray) Sung <ray at multicorewareinc.com>
Date: Mon Nov 18 14:46:30 2013 -0600
GPU Calc: implement horizontal ranges as multiple VectorRefs
AMLOEXT-242 Fix
Change-Id: Ia3deb221528230554b7c431e926b10428441666a
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index f6e6baf..224ed49 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -65,9 +65,10 @@ size_t VectorRef::Marshal(cl_kernel k, int argno, int, cl_program)
const formula::DoubleVectorRefToken* pDVR =
dynamic_cast< const formula::DoubleVectorRefToken* >(ref);
assert(pDVR);
- if (pDVR->GetArrays()[0].mpNumericArray == NULL)
+ if (pDVR->GetArrays()[mnIndex].mpNumericArray == NULL)
throw Unhandled();
- pHostBuffer = const_cast<double*>(pDVR->GetArrays()[0].mpNumericArray);
+ pHostBuffer = const_cast<double*>(
+ pDVR->GetArrays()[mnIndex].mpNumericArray);
szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
} else {
throw Unhandled();
@@ -281,8 +282,8 @@ class DynamicKernelStringArgument: public VectorRef
{
public:
DynamicKernelStringArgument(const std::string &s,
- FormulaTreeNodeRef ft):
- VectorRef(s, ft) {}
+ FormulaTreeNodeRef ft, int index = 0):
+ VectorRef(s, ft, index) {}
virtual void GenSlidingWindowFunction(std::stringstream &) {}
/// Generate declaration
@@ -319,7 +320,7 @@ size_t DynamicKernelStringArgument::Marshal(cl_kernel k, int argno, int, cl_prog
dynamic_cast< const formula::DoubleVectorRefToken* >(ref);
assert(pDVR);
nStrings = pDVR->GetArrayLength();
- vRef = pDVR->GetArrays()[0];
+ vRef = pDVR->GetArrays()[mnIndex];
}
size_t szHostBuffer = nStrings * sizeof(cl_int);
// Marshal strings. Right now we pass hashes of these string
@@ -411,8 +412,9 @@ class DynamicKernelSlidingArgument: public Base
{
public:
DynamicKernelSlidingArgument(const std::string &s,
- FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen):
- Base(s, ft), mpCodeGen(CodeGen), mpClmem2(NULL)
+ FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen,
+ int index=0):
+ Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
{
FormulaToken *t = ft->GetFormulaToken();
if (t->GetType() != formula::svDoubleVectorRef)
@@ -491,9 +493,6 @@ public:
std::stringstream &ss, bool &needBody)
{
assert(mpDVR);
- // Do not handle horizontal double vectors yet
- if (mpDVR->GetArrays().size() > 1)
- throw Unhandled();
size_t nCurWindowSize = mpDVR->GetRefRowSize();
if (dynamic_cast<OpSum*>(mpCodeGen.get()))
{
@@ -528,6 +527,15 @@ public:
}
}
needBody = true;
+
+ // No need to generate a for-loop for degenerated cases
+ if (nCurWindowSize == 1)
+ {
+ ss << "if (gid0 <" << mpDVR->GetArrayLength();
+ ss << ")\n\t{\tint i = 0;\n\t\t";
+ return nCurWindowSize;
+ }
+
ss << "for (int i = ";
if (!bIsStartFixed && bIsEndFixed)
{
@@ -579,10 +587,10 @@ public:
size_t nInput = mpDVR->GetArrayLength();
size_t nCurWindowSize = mpDVR->GetRefRowSize();
// create clmem buffer
- if (mpDVR->GetArrays()[0].mpNumericArray == NULL)
+ if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == NULL)
throw Unhandled();
double *pHostBuffer = const_cast<double*>(
- mpDVR->GetArrays()[0].mpNumericArray);
+ mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
size_t szHostBuffer = nInput * sizeof(double);
Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
(cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
@@ -1269,15 +1277,19 @@ DynamicKernelSoPArguments::DynamicKernelSoPArguments(
const formula::DoubleVectorRefToken* pDVR =
dynamic_cast< const formula::DoubleVectorRefToken* >(pChild);
assert(pDVR);
- if (pDVR->GetArrays()[0].mpNumericArray)
- mvSubArguments.push_back(
- SubArgument(new DynamicKernelSlidingArgument
- <VectorRef>(ts, ft->Children[i], mpCodeGen)));
- else
- mvSubArguments.push_back(
- SubArgument(new DynamicKernelSlidingArgument
- <DynamicKernelStringArgument>(
- ts, ft->Children[i], mpCodeGen)));
+ for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
+ {
+ if (pDVR->GetArrays()[j].mpNumericArray)
+ mvSubArguments.push_back(
+ SubArgument(new DynamicKernelSlidingArgument
+ <VectorRef>(
+ ts, ft->Children[i], mpCodeGen, j)));
+ else
+ mvSubArguments.push_back(
+ SubArgument(new DynamicKernelSlidingArgument
+ <DynamicKernelStringArgument>(
+ ts, ft->Children[i], mpCodeGen, j)));
+ }
} else if (pChild->GetType() == formula::svSingleVectorRef) {
const formula::SingleVectorRefToken* pSVR =
dynamic_cast< const formula::SingleVectorRefToken* >(pChild);
diff --git a/sc/source/core/opencl/opbase.cxx b/sc/source/core/opencl/opbase.cxx
index 41e5528..6bb866c 100644
--- a/sc/source/core/opencl/opbase.cxx
+++ b/sc/source/core/opencl/opbase.cxx
@@ -30,8 +30,16 @@ FormulaToken* DynamicKernelArgument::GetFormulaToken(void) const
return mFormulaTree->GetFormulaToken();
}
-VectorRef::VectorRef(const std::string &s, FormulaTreeNodeRef ft):
- DynamicKernelArgument(s, ft), mpClmem(NULL) {}
+VectorRef::VectorRef(const std::string &s, FormulaTreeNodeRef ft, int idx):
+ DynamicKernelArgument(s, ft), mpClmem(NULL), mnIndex(idx)
+{
+ if (mnIndex)
+ {
+ std::stringstream ss;
+ ss << mSymName << "s" << mnIndex;
+ mSymName = ss.str();
+ }
+}
VectorRef::~VectorRef()
{
diff --git a/sc/source/core/opencl/opbase.hxx b/sc/source/core/opencl/opbase.hxx
index 11b66df..d921119 100644
--- a/sc/source/core/opencl/opbase.hxx
+++ b/sc/source/core/opencl/opbase.hxx
@@ -103,7 +103,7 @@ public:
virtual bool NeedParallelReduction(void) const { return false; }
protected:
- const std::string mSymName;
+ std::string mSymName;
FormulaTreeNodeRef mFormulaTree;
};
@@ -115,12 +115,11 @@ protected:
class VectorRef : public DynamicKernelArgument
{
public:
- VectorRef(const std::string &s, FormulaTreeNodeRef ft);
+ VectorRef(const std::string &s, FormulaTreeNodeRef ft, int index = 0);
const std::string &GetNameAsString(void) const { return mSymName; }
/// Generate declaration
virtual void GenDecl(std::stringstream &ss) const;
-
/// When declared as input to a sliding window function
virtual void GenSlidingWindowDecl(std::stringstream &ss) const;
@@ -146,6 +145,8 @@ public:
protected:
// Used by marshaling
cl_mem mpClmem;
+ // index in multiple double vector refs that have multiple ranges
+ const int mnIndex;
};
/// Abstract class for code generation
More information about the Libreoffice-commits
mailing list