[Libreoffice-commits] core.git: sc/source
I-Jui Sung (Ray)
ray at multicorewareinc.com
Tue Nov 19 16:07:42 PST 2013
sc/source/core/opencl/formulagroupcl.cxx | 290 +++++++++++++++++++------------
1 file changed, 186 insertions(+), 104 deletions(-)
New commits:
commit 2c39e778873f10037721d844697962dc41e3bcc3
Author: I-Jui (Ray) Sung <ray at multicorewareinc.com>
Date: Tue Nov 19 16:42:56 2013 -0600
GPU Calc: separate out parallel reduction from DynamicKernelSlidingArgument
Create a new class ParallelReductionVectorRef to straighten out code
generation and marshaling logic between sequential and parallel code
generation alternatives.
Change-Id: Id029ad441f80712f8e7396dcd985e3363ce08ff8
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index abd3230..388605c 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -409,6 +409,7 @@ protected:
/// Handling a Double Vector that is used as a sliding window input
/// to either a sliding window average or sum-of-products
+/// Generate a sequential loop for reductions
class OpSum; // Forward Declaration
class OpAverage; // Forward Declaration
class OpMin; // Forward Declaration
@@ -430,79 +431,8 @@ public:
bIsStartFixed = mpDVR->IsStartFixed();
bIsEndFixed = mpDVR->IsEndFixed();
}
- virtual bool NeedParallelReduction(void) const
- {
- if ((dynamic_cast<OpSum*>(mpCodeGen.get())
- && !dynamic_cast<OpAverage*>(mpCodeGen.get())) ||
- dynamic_cast<OpMin*>(mpCodeGen.get()) ||
- dynamic_cast<OpMax*>(mpCodeGen.get()) ||
- dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
- return GetWindowSize()> 100 &&
- ( (GetStartFixed() && GetEndFixed()) ||
- (!GetStartFixed() && !GetEndFixed()) ) ;
- else
- return false;
- }
- virtual void GenSlidingWindowFunction(std::stringstream &ss) {
- if (!dynamic_cast<OpSumIfs*>(mpCodeGen.get())
- && NeedParallelReduction())
- {
- std::string name = Base::GetName();
- ss << "__kernel void "<<name;
- ss << "_reduction(__global double* A, "
- "__global double *result,int arrayLength,int windowSize){\n";
- ss << " double tmp, current_result =" <<
- mpCodeGen->GetBottom();
- ss << ";\n";
- ss << " int writePos = get_group_id(1);\n";
- ss << " int lidx = get_local_id(0);\n";
- ss << " __local double shm_buf[256];\n";
- if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
- ss << " int offset = 0;\n";
- else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
- ss << " int offset = get_group_id(1);\n";
- else
- throw Unhandled();
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
- ss << " int loop = arrayLength/512 + 1;\n";
- ss << " for (int l=0; l<loop; l++){\n";
- ss << " tmp = "<< mpCodeGen->GetBottom() << ";\n";
- ss << " int loopOffset = l*512;\n";
- ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n";
- ss << " tmp = ";
- ss << mpCodeGen->Gen2(
- std::string(
- "legalize(A[loopOffset + lidx + offset], ")+
- mpCodeGen->GetBottom() +")",
- std::string(
- "legalize(A[loopOffset + lidx + offset + 256], ")+
- mpCodeGen->GetBottom() +")"
- );
- ss << ";";
- ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n";
- ss << " tmp = legalize(A[loopOffset + lidx + offset],";
- ss << mpCodeGen->GetBottom() << ");\n";
- ss << " shm_buf[lidx] = tmp;\n";
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
- ss << " for (int i = 128; i >0; i/=2) {\n";
- ss << " if (lidx < i)\n";
- ss << " shm_buf[lidx] = ";
- ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]");
- ss << ";";
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
- ss << " }\n";
- ss << " if (lidx == 0)\n";
- ss << " current_result =";
- ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
- ss << ";\n";
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
- ss << " }\n";
- ss << " if (lidx == 0)\n";
- ss << " result[writePos] = current_result;\n";
- ss << "}\n";
- }
- }
+ virtual void GenSlidingWindowFunction(std::stringstream &) {}
virtual std::string GenSlidingWindowDeclRef(bool=false) const
{
@@ -519,26 +449,7 @@ public:
{
assert(mpDVR);
size_t nCurWindowSize = mpDVR->GetRefRowSize();
- if (!dynamic_cast<OpSumIfs*>(mpCodeGen.get())
- && NeedParallelReduction())
- {
- if ((!bIsStartFixed && !bIsEndFixed) ||
- (bIsStartFixed && bIsEndFixed))
- {
- // set 100 as a temporary threshold for invoking reduction
- // kernel in NeedParalleLReduction function
- if (NeedParallelReduction())
- {
- std::string temp = Base::GetName() + "[gid0]";
- ss << "tmp = ";
- ss << mpCodeGen->Gen2(temp, "tmp");
- ss << ";\n\t";
- needBody = false;
- return nCurWindowSize;
- }
- }
- }
-// original for loop
+ // original for loop
#ifndef UNROLLING
needBody = true;
// No need to generate a for-loop for degenerated cases
@@ -586,8 +497,6 @@ public:
return nCurWindowSize;
#endif
-
-
#ifdef UNROLLING
{
if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) {
@@ -709,13 +618,134 @@ return nCurWindowSize;
}
#endif
}
+ ~DynamicKernelSlidingArgument()
+ {
+ if (mpClmem2)
+ {
+ clReleaseMemObject(mpClmem2);
+ mpClmem2 = NULL;
+ }
+ }
- virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
+ size_t GetArrayLength(void) const {return mpDVR->GetArrayLength(); }
+
+ size_t GetWindowSize(void) const {return mpDVR->GetRefRowSize(); }
+
+ size_t GetStartFixed(void) const {return bIsStartFixed; }
+
+ size_t GetEndFixed(void) const {return bIsEndFixed; }
+
+protected:
+ bool bIsStartFixed, bIsEndFixed;
+ const formula::DoubleVectorRefToken *mpDVR;
+ // from parent nodes
+ boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
+ // controls whether to invoke the reduction kernel during marshaling or not
+ cl_mem mpClmem2;
+};
+
+/// Handling a Double Vector that is used as a sliding window input
+/// Performs parallel reduction based on given operator
+template<class Base>
+class ParallelReductionVectorRef: public Base
+{
+public:
+ ParallelReductionVectorRef(const std::string &s,
+ FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen,
+ int index=0):
+ Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
+ {
+ FormulaToken *t = ft->GetFormulaToken();
+ if (t->GetType() != formula::svDoubleVectorRef)
+ throw Unhandled();
+ mpDVR = dynamic_cast<const formula::DoubleVectorRefToken *>(t);
+ assert(mpDVR);
+ bIsStartFixed = mpDVR->IsStartFixed();
+ bIsEndFixed = mpDVR->IsEndFixed();
+ }
+ /// Emit the definition for the auxiliary reduction kernel
+ virtual void GenSlidingWindowFunction(std::stringstream &ss) {
+ std::string name = Base::GetName();
+ ss << "__kernel void "<<name;
+ ss << "_reduction(__global double* A, "
+ "__global double *result,int arrayLength,int windowSize){\n";
+ ss << " double tmp, current_result =" <<
+ mpCodeGen->GetBottom();
+ ss << ";\n";
+ ss << " int writePos = get_group_id(1);\n";
+ ss << " int lidx = get_local_id(0);\n";
+ ss << " __local double shm_buf[256];\n";
+ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+ ss << " int offset = 0;\n";
+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+ ss << " int offset = get_group_id(1);\n";
+ else
+ throw Unhandled();
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " int loop = arrayLength/512 + 1;\n";
+ ss << " for (int l=0; l<loop; l++){\n";
+ ss << " tmp = "<< mpCodeGen->GetBottom() << ";\n";
+ ss << " int loopOffset = l*512;\n";
+ ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n";
+ ss << " tmp = ";
+ ss << mpCodeGen->Gen2(
+ std::string(
+ "legalize(A[loopOffset + lidx + offset], ")+
+ mpCodeGen->GetBottom() +")",
+ std::string(
+ "legalize(A[loopOffset + lidx + offset + 256], ")+
+ mpCodeGen->GetBottom() +")"
+ );
+ ss << ";\n";
+ ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n";
+ ss << " tmp = legalize(A[loopOffset + lidx + offset],";
+ ss << mpCodeGen->GetBottom() << ");\n";
+ ss << " shm_buf[lidx] = tmp;\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " for (int i = 128; i >0; i/=2) {\n";
+ ss << " if (lidx < i)\n";
+ ss << " shm_buf[lidx] = ";
+ ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]");
+ ss << ";";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " }\n";
+ ss << " if (lidx == 0)\n";
+ ss << " current_result =";
+ ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
+ ss << ";\n";
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ ss << " }\n";
+ ss << " if (lidx == 0)\n";
+ ss << " result[writePos] = current_result;\n";
+ ss << "}\n";
+ }
+
+
+ virtual std::string GenSlidingWindowDeclRef(bool=false) const
+ {
+ std::stringstream ss;
+ if (!bIsStartFixed && !bIsEndFixed)
+ ss << Base::GetName() << "[i + gid0]";
+ else
+ ss << Base::GetName() << "[i]";
+ return ss.str();
+ }
+ /// Controls how the elements in the DoubleVectorRef are traversed
+ virtual size_t GenReductionLoopHeader(
+ std::stringstream &ss, bool &needBody)
{
- if (!NeedParallelReduction() ||
- dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
- return Base::Marshal(k, argno, w, mpProgram);
+ assert(mpDVR);
+ size_t nCurWindowSize = mpDVR->GetRefRowSize();
+ std::string temp = Base::GetName() + "[gid0]";
+ ss << "tmp = ";
+ ss << mpCodeGen->Gen2(temp, "tmp");
+ ss << ";\n\t";
+ needBody = false;
+ return nCurWindowSize;
+ }
+ virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
+ {
assert(Base::mpClmem == NULL);
// Obtain cl context
KernelEnv kEnv;
@@ -780,7 +810,7 @@ return nCurWindowSize;
throw OpenCLError(err);
return 1;
}
- ~DynamicKernelSlidingArgument()
+ ~ParallelReductionVectorRef()
{
if (mpClmem2)
{
@@ -806,6 +836,8 @@ protected:
cl_mem mpClmem2;
};
+
+
/// Abstract class for code generation
class Reduction: public SlidingFunctionBase
@@ -813,6 +845,7 @@ class Reduction: public SlidingFunctionBase
public:
typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
+ typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
virtual void GenSlidingWindowFunction(std::stringstream &ss,
const std::string sSymName, SubArguments &vSubArguments)
@@ -834,13 +867,23 @@ public:
size_t nItems = 0;
while (i--)
{
- if (NumericRange *NR = dynamic_cast<NumericRange *> (vSubArguments[i].get()))
+ if (NumericRange *NR =
+ dynamic_cast<NumericRange *> (vSubArguments[i].get()))
{
bool needBody;
nItems += NR->GenReductionLoopHeader(ss, needBody);
if (needBody == false) continue;
}
- else if (StringRange *SR = dynamic_cast<StringRange *> (vSubArguments[i].get()))
+ else if (ParallelNumericRange *PNR =
+ dynamic_cast<ParallelNumericRange *> (vSubArguments[i].get()))
+ {
+ //did not handle yet
+ bool needBody;
+ nItems += PNR->GenReductionLoopHeader(ss, needBody);
+ if (needBody == false) continue;
+ }
+ else if (StringRange *SR =
+ dynamic_cast<StringRange *> (vSubArguments[i].get()))
{
//did not handle yet
bool needBody;
@@ -1497,6 +1540,46 @@ boost::shared_ptr<DynamicKernelArgument> SoPHelper(
return boost::shared_ptr<DynamicKernelArgument>(new DynamicKernelSoPArguments(ts, ft, pCodeGen));
}
+template<class Base>
+DynamicKernelArgument *VectorRefFactory(const std::string &s,
+ const FormulaTreeNodeRef& ft,
+ boost::shared_ptr<SlidingFunctionBase> &pCodeGen,
+ int index)
+{
+ //Black lists ineligible classes here ..
+ // SUMIFS does not perform parallel reduction at DoubleVectorRef level
+ if (dynamic_cast<OpSumIfs*>(pCodeGen.get())) {
+ return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
+ }
+ // AVERAGE is not supported yet
+ else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
+ {
+ return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
+ }
+ // COUNT is not supported yet
+ else if (dynamic_cast<OpCount*>(pCodeGen.get()))
+ {
+ return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
+ }
+ // Only child class of Reduction is supported
+ else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
+ {
+ return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
+ }
+
+ const formula::DoubleVectorRefToken* pDVR =
+ dynamic_cast< const formula::DoubleVectorRefToken* >(
+ ft->GetFormulaToken());
+ // Window being too small to justify a parallel reduction
+ if (pDVR->GetRefRowSize() < 100)
+ return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
+ if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) ||
+ (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
+ return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index);
+ else // Other cases are not supported as well
+ return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
+}
+
DynamicKernelSoPArguments::DynamicKernelSoPArguments(
const std::string &s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen) :
DynamicKernelArgument(s, ft), mpCodeGen(pCodeGen)
@@ -1523,12 +1606,11 @@ DynamicKernelSoPArguments::DynamicKernelSoPArguments(
{
if (pDVR->GetArrays()[j].mpNumericArray)
mvSubArguments.push_back(
- SubArgument(new DynamicKernelSlidingArgument
- <VectorRef>(
+ SubArgument(VectorRefFactory<VectorRef>(
ts, ft->Children[i], mpCodeGen, j)));
else
mvSubArguments.push_back(
- SubArgument(new DynamicKernelSlidingArgument
+ SubArgument(VectorRefFactory
<DynamicKernelStringArgument>(
ts, ft->Children[i], mpCodeGen, j)));
}
More information about the Libreoffice-commits
mailing list