[Libreoffice-commits] core.git: 3 commits - sc/source

Mon Jan 12 03:05:21 PST 2015

sc/source/core/opencl/formulagroupcl.cxx |  382 ++++++++++++++++++++++++++++---
 1 file changed, 356 insertions(+), 26 deletions(-)

New commits:
commit 226e367ff93542d82975d6ccd34448f68acf8035
Author: Tor Lillqvist <tml at collabora.com>
Date:   Mon Jan 12 13:03:02 2015 +0200

    SAL_INFO the setting arguments to OpenCL kernels and enqueueing them
    
    Change-Id: Ia60194f9789324bc484bfa609c6eb92572b8554d

diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index bf99b58..d407a79 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -156,6 +156,7 @@ size_t VectorRef::Marshal( cl_kernel k, int argno, int, cl_program )
             pNanBuffer, 0, NULL, NULL);
     }
 
+    SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem);
     err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
     if (CL_SUCCESS != err)
         throw OpenCLError(err, __FILE__, __LINE__);
@@ -214,6 +215,7 @@ public:
         }
 
         // Pass the scalar result back to the rest of the formula kernel
+        SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_uint: " << hashCode);
         cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), (void*)&hashCode);
         if (CL_SUCCESS != err)
             throw OpenCLError(err, __FILE__, __LINE__);
@@ -263,6 +265,7 @@ public:
     {
         double tmp = GetDouble();
         // Pass the scalar result back to the rest of the formula kernel
+        SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
         cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
         if (CL_SUCCESS != err)
             throw OpenCLError(err, __FILE__, __LINE__);
@@ -303,6 +306,7 @@ public:
     {
         double tmp = 0.0;
         // Pass the scalar result back to the rest of the formula kernel
+        SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
         cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
         if (CL_SUCCESS != err)
             throw OpenCLError(err, __FILE__, __LINE__);
@@ -662,6 +666,7 @@ threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
     {
         cl_int seed = comphelper::rng::uniform_int_distribution(0, SAL_MAX_INT32);
         // Pass the scalar result back to the rest of the formula kernel
+        SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_int: " << seed);
         cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), (void*)&seed);
         if (CL_SUCCESS != err)
             throw OpenCLError(err, __FILE__, __LINE__);
@@ -770,6 +775,7 @@ size_t DynamicKernelStringArgument::Marshal( cl_kernel k, int argno, int, cl_pro
     if (CL_SUCCESS != err)
         throw OpenCLError(err, __FILE__, __LINE__);
 
+    SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem);
     err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
     if (CL_SUCCESS != err)
         throw OpenCLError(err, __FILE__, __LINE__);
@@ -1414,19 +1420,23 @@ public:
         // set kernel arg of reduction kernel
         // TODO(Wei Wei): use unique name for kernel
         cl_mem buf = Base::GetCLBuffer();
+        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
         err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
             (void*)&buf);
         if (CL_SUCCESS != err)
             throw OpenCLError(err, __FILE__, __LINE__);
 
+        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
         err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), (void*)&mpClmem2);
         if (CL_SUCCESS != err)
             throw OpenCLError(err, __FILE__, __LINE__);
 
+        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
         err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput);
         if (CL_SUCCESS != err)
             throw OpenCLError(err, __FILE__, __LINE__);
 
+        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
         err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize);
         if (CL_SUCCESS != err)
             throw OpenCLError(err, __FILE__, __LINE__);
@@ -1434,6 +1444,7 @@ public:
         // set work group size and execute
         size_t global_work_size[] = { 256, (size_t)w };
         size_t local_work_size[] = { 256, 1 };
+        SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
         err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
             global_work_size, local_work_size, 0, NULL, NULL);
         if (CL_SUCCESS != err)
@@ -1465,19 +1476,23 @@ public:
                 throw OpenCLError(err, __FILE__, __LINE__);
             // set kernel arg of reduction kernel
             buf = Base::GetCLBuffer();
+            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
             err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
                 (void*)&buf);
             if (CL_SUCCESS != err)
                 throw OpenCLError(err, __FILE__, __LINE__);
 
+            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
             err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), (void*)&mpClmem2);
             if (CL_SUCCESS != err)
                 throw OpenCLError(err, __FILE__, __LINE__);
 
+            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
             err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput);
             if (CL_SUCCESS != err)
                 throw OpenCLError(err, __FILE__, __LINE__);
 
+            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
             err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize);
             if (CL_SUCCESS != err)
                 throw OpenCLError(err, __FILE__, __LINE__);
@@ -1485,6 +1500,7 @@ public:
             // set work group size and execute
             size_t global_work_size1[] = { 256, (size_t)w };
             size_t local_work_size1[] = { 256, 1 };
+            SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
             err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
                 global_work_size1, local_work_size1, 0, NULL, NULL);
             if (CL_SUCCESS != err)
@@ -1514,6 +1530,7 @@ public:
                 throw OpenCLError(err, __FILE__, __LINE__);
         }
         // set kernel arg
+        SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
         err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&(mpClmem2));
         if (CL_SUCCESS != err)
             throw OpenCLError(err, __FILE__, __LINE__);
@@ -2139,12 +2156,14 @@ public:
                 // set kernel arg of reduction kernel
                 for (size_t j = 0; j < vclmem.size(); j++)
                 {
+                    SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": " << (vclmem[j] ? "cl_mem" : "double") << ": " << vclmem[j]);
                     err = clSetKernelArg(redKernel, j,
                         vclmem[j] ? sizeof(cl_mem) : sizeof(double),
                         (void*)&vclmem[j]);
                     if (CL_SUCCESS != err)
                         throw OpenCLError(err, __FILE__, __LINE__);
                 }
+                SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << pClmem2);
                 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void*)&pClmem2);
                 if (CL_SUCCESS != err)
                     throw OpenCLError(err, __FILE__, __LINE__);
@@ -2152,6 +2171,7 @@ public:
                 // set work group size and execute
                 size_t global_work_size[] = { 256, (size_t)nVectorWidth };
                 size_t local_work_size[] = { 256, 1 };
+                SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
                 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
                     global_work_size, local_work_size, 0, NULL, NULL);
                 if (CL_SUCCESS != err)
@@ -2161,6 +2181,7 @@ public:
                     throw OpenCLError(err, __FILE__, __LINE__);
 
                 // Pass pClmem2 to the "real" kernel
+                SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << pClmem2);
                 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&pClmem2);
                 if (CL_SUCCESS != err)
                     throw OpenCLError(err, __FILE__, __LINE__);
@@ -2206,6 +2227,10 @@ public:
                 // set kernel arg of reduction kernel
                 for (size_t j = 0; j < vclmem.size(); j++)
                 {
+                    if (vclmem[j].mCLMem)
+                        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": cl_mem: " << vclmem[j].mCLMem);
+                    else
+                        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": double: " << vclmem[j].mConst);
                     err = clSetKernelArg(redKernel, j,
                         vclmem[j].mCLMem ? sizeof(cl_mem) : sizeof(double),
                         vclmem[j].mCLMem ? (void*)&vclmem[j].mCLMem :
@@ -2213,20 +2238,24 @@ public:
                     if (CL_SUCCESS != err)
                         throw OpenCLError(err, __FILE__, __LINE__);
                 }
+                SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << mpClmem2);
                 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void*)&mpClmem2);
                 if (CL_SUCCESS != err)
                     throw OpenCLError(err, __FILE__, __LINE__);
 
+                SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 1) << ": cl_int: " << nInput);
                 err = clSetKernelArg(redKernel, vclmem.size() + 1, sizeof(cl_int), (void*)&nInput);
                 if (CL_SUCCESS != err)
                     throw OpenCLError(err, __FILE__, __LINE__);
 
+                SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 2) << ": cl_int: " << nCurWindowSize);
                 err = clSetKernelArg(redKernel, vclmem.size() + 2, sizeof(cl_int), (void*)&nCurWindowSize);
                 if (CL_SUCCESS != err)
                     throw OpenCLError(err, __FILE__, __LINE__);
                 // set work group size and execute
                 size_t global_work_size[] = { 256, (size_t)nVectorWidth };
                 size_t local_work_size[] = { 256, 1 };
+                SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
                 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
                     global_work_size, local_work_size, 0, NULL, NULL);
                 if (CL_SUCCESS != err)
@@ -2236,6 +2265,7 @@ public:
                     throw OpenCLError(err, __FILE__, __LINE__);
                 clReleaseKernel(redKernel);
                 // Pass mpClmem2 to the "real" kernel
+                SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
                 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem2);
                 if (CL_SUCCESS != err)
                     throw OpenCLError(err, __FILE__, __LINE__);
@@ -3712,12 +3742,14 @@ void DynamicKernel::Launch( size_t nr )
         nr * sizeof(double), NULL, &err);
     if (CL_SUCCESS != err)
         throw OpenCLError(err, __FILE__, __LINE__);
+    SAL_INFO("sc.opencl", "Kernel " << mpKernel << " arg " << 0 << ": cl_mem: " << mpResClmem);
     err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), (void*)&mpResClmem);
     if (CL_SUCCESS != err)
         throw OpenCLError(err, __FILE__, __LINE__);
     // The rest of buffers
     mSyms.Marshal(mpKernel, nr, mpProgram);
     size_t global_work_size[] = { nr };
+    SAL_INFO("sc.opencl", "Enqueing kernel " << mpKernel);
     err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, NULL,
         global_work_size, NULL, 0, NULL, NULL);
     if (CL_SUCCESS != err)
commit 4200d7300ec9355e0d648d7053484901b5dd2009
Author: Tor Lillqvist <tml at collabora.com>
Date:   Mon Jan 12 12:41:46 2015 +0200

    Rewrite the RAND() OpenCL implementation to actually be random
    
    Use a so-called counter-based random number generator. Code from Random123,
    http://www.deshawresearch.com/resources_random123.html.
    
    Change-Id: Id47f84ef18eada64dcf47762a61ec3856c71760e

diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index f0cfd930..bf99b58 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -16,6 +16,7 @@
 #include "tokenarray.hxx"
 #include "compiler.hxx"
 #include "interpre.hxx"
+#include <comphelper/random.hxx>
 #include <formula/vectortoken.hxx>
 #include "scmatrix.hxx"
 
@@ -326,31 +327,331 @@ public:
     }
     virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
     {
-        GenDecl(ss);
+        ss << "int " << mSymName;
     }
     virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
     {
-        return mSymName + "_Random()";
+        return mSymName + "_Random(" + mSymName + ")";
     }
     virtual void GenSlidingWindowFunction( std::stringstream& ss ) SAL_OVERRIDE
     {
-        ss << "\ndouble " << mSymName;
-        ss << "_Random ()\n{\n";
-        ss << "    int i, gid0=get_global_id(0);;\n";
-        ss << "    double tmp = 0;\n";
-        ss << "    double M = 2147483647;\n";
-        ss << "    double Lamda = 32719;\n";
-        ss << "    double f;\n";
-        ss << "    f = gid0 + 1;\n";
-        ss << "    int k;\n";
-        ss << "    for(i = 1;i <= 100; ++i){\n";
-        ss << "        f = Lamda * f;\n";
-        ss << "        k = (int)(f * pow(M,-1.0));\n";
-        ss << "        f = f - M * k;\n";
-        ss << "    }\n";
-        ss << "    tmp = f * pow(M,-1.0);\n";
-        ss << "    return tmp;\n";
-        ss << "}";
+        // This string is from the pi_opencl_kernel.i file as
+        // generated when building the Random123 examples. Unused
+        // stuff has been removed, and the actual kernel is not the
+        // same as in the totally different use case of that example,
+        // of course. Only the code that calculates the counter-based
+        // random number and what it needs is left.
+        ss << "\
+/*\n\
+Copyright 2010-2011, D. E. Shaw Research.\n\
+All rights reserved.\n\
+\n\
+Redistribution and use in source and binary forms, with or without\n\
+modification, are permitted provided that the following conditions are\n\
+met:\n\
+\n\
+* Redistributions of source code must retain the above copyright\n\
+  notice, this list of conditions, and the following disclaimer.\n\
+\n\
+* Redistributions in binary form must reproduce the above copyright\n\
+  notice, this list of conditions, and the following disclaimer in the\n\
+  documentation and/or other materials provided with the distribution.\n\
+\n\
+* Neither the name of D. E. Shaw Research nor the names of its\n\
+  contributors may be used to endorse or promote products derived from\n\
+  this software without specific prior written permission.\n\
+\n\
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
+\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
+*/\n\
+\n\
+typedef uint uint32_t;\n\
+struct r123array2x32\n\
+{\n\
+  uint32_t v[2];\n\
+};\n\
+enum r123_enum_threefry32x2\n\
+{\n\
+  R_32x2_0_0 = 13,\n\
+  R_32x2_1_0 = 15,\n\
+  R_32x2_2_0 = 26,\n\
+  R_32x2_3_0 = 6,\n\
+  R_32x2_4_0 = 17,\n\
+  R_32x2_5_0 = 29,\n\
+  R_32x2_6_0 = 16,\n\
+  R_32x2_7_0 = 24\n\
+};\n\
+inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
+  __attribute__ ((always_inline));\n\
+inline uint32_t\n\
+RotL_32 (uint32_t x, unsigned int N)\n\
+{\n\
+  return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
+}\n\
+\n\
+typedef struct r123array2x32 threefry2x32_ctr_t;\n\
+typedef struct r123array2x32 threefry2x32_key_t;\n\
+typedef struct r123array2x32 threefry2x32_ukey_t;\n\
+inline threefry2x32_key_t\n\
+threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
+{\n\
+  return uk;\n\
+}\n\
+\n\
+inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
+                      threefry2x32_ctr_t in,\n\
+                      threefry2x32_key_t k)\n\
+  __attribute__ ((always_inline));\n\
+inline threefry2x32_ctr_t\n\
+threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
+        threefry2x32_key_t k)\n\
+{\n\
+  threefry2x32_ctr_t X;\n\
+  uint32_t ks[2 + 1];\n\
+  int i;\n\
+  ks[2] = 0x1BD11BDA;\n\
+  for (i = 0; i < 2; i++) {\n\
+    ks[i] = k.v[i];\n\
+    X.v[i] = in.v[i];\n\
+    ks[2] ^= k.v[i];\n\
+  }\n\
+  X.v[0] += ks[0];\n\
+  X.v[1] += ks[1];\n\
+  if (Nrounds > 0) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 1) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 2) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 3) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 3) {\n\
+    X.v[0] += ks[1];\n\
+    X.v[1] += ks[2];\n\
+    X.v[1] += 1;\n\
+  }\n\
+  if (Nrounds > 4) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 5) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 6) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 7) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 7) {\n\
+    X.v[0] += ks[2];\n\
+    X.v[1] += ks[0];\n\
+    X.v[1] += 2;\n\
+  }\n\
+  if (Nrounds > 8) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 9) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 10) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 11) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 11) {\n\
+    X.v[0] += ks[0];\n\
+    X.v[1] += ks[1];\n\
+    X.v[1] += 3;\n\
+  }\n\
+  if (Nrounds > 12) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 13) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 14) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 15) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 15) {\n\
+    X.v[0] += ks[1];\n\
+    X.v[1] += ks[2];\n\
+    X.v[1] += 4;\n\
+  }\n\
+  if (Nrounds > 16) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 17) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 18) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 19) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 19) {\n\
+    X.v[0] += ks[2];\n\
+    X.v[1] += ks[0];\n\
+    X.v[1] += 5;\n\
+  }\n\
+  if (Nrounds > 20) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 21) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 22) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 23) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 23) {\n\
+    X.v[0] += ks[0];\n\
+    X.v[1] += ks[1];\n\
+    X.v[1] += 6;\n\
+  }\n\
+  if (Nrounds > 24) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 25) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 26) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 27) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 27) {\n\
+    X.v[0] += ks[1];\n\
+    X.v[1] += ks[2];\n\
+    X.v[1] += 7;\n\
+  }\n\
+  if (Nrounds > 28) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 29) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 30) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 31) {\n\
+    X.v[0] += X.v[1];\n\
+    X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
+    X.v[1] ^= X.v[0];\n\
+  }\n\
+  if (Nrounds > 31) {\n\
+    X.v[0] += ks[2];\n\
+    X.v[1] += ks[0];\n\
+    X.v[1] += 8;\n\
+  }\n\
+  return X;\n\
+}\n\
+\n\
+enum r123_enum_threefry2x32\n\
+{ threefry2x32_rounds = 20 };\n\
+inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
+                    threefry2x32_key_t k)\n\
+  __attribute__ ((always_inline));\n\
+inline threefry2x32_ctr_t\n\
+threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
+{\n\
+  return threefry2x32_R (threefry2x32_rounds, in, k);\n\
+}\n\
+\n\
+";
+        ss << "double " << mSymName << "_Random (int seed)\n\
+{\n\
+  unsigned tid = get_global_id(0);\n\
+  threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
+  threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
+  c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
+  const double factor = 1./(" << SAL_MAX_UINT32 << ".0 + 1.0);\n\
+  const double halffactor = 0.5*factor;\n\
+  return c.v[0] * factor + halffactor;\n\
+}\n\
+";
     }
     virtual size_t GetWindowSize() const SAL_OVERRIDE
     {
@@ -359,9 +660,9 @@ public:
     /// Create buffer and pass the buffer to a given kernel
     virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
     {
-        double tmp = 0.0;
+        cl_int seed = comphelper::rng::uniform_int_distribution(0, SAL_MAX_INT32);
         // Pass the scalar result back to the rest of the formula kernel
-        cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
+        cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), (void*)&seed);
         if (CL_SUCCESS != err)
             throw OpenCLError(err, __FILE__, __LINE__);
         return 1;
commit 41d40b3a0090f71cef74145da3af2118ab504e59
Author: Tor Lillqvist <tml at collabora.com>
Date:   Fri Jan 9 10:42:28 2015 +0200

    Surely any C++ programmer knows what a forward declaration is
    
    Change-Id: I12d230176ef1ea232ac9a401fbbebce6d8c058a7

diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index a327d1f..f0cfd930 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -546,11 +546,8 @@ protected:
 /// Handling a Double Vector that is used as a sliding window input
 /// to either a sliding window average or sum-of-products
 /// Generate a sequential loop for reductions
-class OpSum; // Forward Declaration
-class OpAverage; // Forward Declaration
-class OpMin; // Forward Declaration
-class OpMax; // Forward Declaration
-class OpCount; // Forward Declaration
+class OpAverage;
+class OpCount;
 
 template<class Base>
 class DynamicKernelSlidingArgument : public Base