[Libreoffice-commits] core.git: 83 commits - sc/CppunitTest_sc_ucalc.mk sc/inc sc/Library_sc.mk sc/qa sc/source

Kohei Yoshida kohei.yoshida at gmail.com
Wed Jul 24 22:16:51 PDT 2013


 sc/CppunitTest_sc_ucalc.mk               |    1 
 sc/Library_sc.mk                         |    1 
 sc/inc/address.hxx                       |    9 
 sc/inc/calcmacros.hxx                    |    6 
 sc/inc/colorscale.hxx                    |    2 
 sc/inc/column.hxx                        |   24 
 sc/inc/compiler.hxx                      |    6 
 sc/inc/document.hxx                      |    9 
 sc/inc/formulacell.hxx                   |   23 
 sc/inc/refdata.hxx                       |  182 --
 sc/inc/reftokenhelper.hxx                |    9 
 sc/inc/refupdatecontext.hxx              |   64 
 sc/inc/scopetools.hxx                    |    9 
 sc/inc/table.hxx                         |   28 
 sc/inc/token.hxx                         |   23 
 sc/inc/tokenarray.hxx                    |   41 
 sc/qa/unit/helper/qahelper.hxx           |    3 
 sc/qa/unit/ucalc.cxx                     |  983 -----------
 sc/qa/unit/ucalc.hxx                     |   22 
 sc/qa/unit/ucalc_formula.cxx             | 1749 +++++++++++++++++++
 sc/source/core/data/colorscale.cxx       |   30 
 sc/source/core/data/column.cxx           |  117 -
 sc/source/core/data/column2.cxx          |    7 
 sc/source/core/data/column3.cxx          |   26 
 sc/source/core/data/conditio.cxx         |    4 
 sc/source/core/data/documen2.cxx         |   20 
 sc/source/core/data/documen3.cxx         |  164 +
 sc/source/core/data/documen4.cxx         |   52 
 sc/source/core/data/document.cxx         |  109 -
 sc/source/core/data/formulacell.cxx      |  797 +++++---
 sc/source/core/data/formulaiter.cxx      |   17 
 sc/source/core/data/refupdatecontext.cxx |   34 
 sc/source/core/data/table1.cxx           |   25 
 sc/source/core/data/table2.cxx           |   20 
 sc/source/core/data/table5.cxx           |    5 
 sc/source/core/data/validat.cxx          |    3 
 sc/source/core/inc/refupdat.hxx          |   42 
 sc/source/core/opencl/formulagroupcl.cxx |  337 ++-
 sc/source/core/opencl/oclkernels.hxx     |  172 -
 sc/source/core/opencl/openclwrapper.cxx  | 2751 ++++++++++++-------------------
 sc/source/core/opencl/openclwrapper.hxx  |  185 --
 sc/source/core/tool/address.cxx          |   18 
 sc/source/core/tool/chartlis.cxx         |    8 
 sc/source/core/tool/chgtrack.cxx         |   35 
 sc/source/core/tool/compiler.cxx         |  481 ++---
 sc/source/core/tool/consoli.cxx          |   16 
 sc/source/core/tool/detfunc.cxx          |    8 
 sc/source/core/tool/interpr1.cxx         |   22 
 sc/source/core/tool/interpr2.cxx         |   43 
 sc/source/core/tool/interpr4.cxx         |  173 -
 sc/source/core/tool/rangenam.cxx         |   50 
 sc/source/core/tool/refdata.cxx          |  331 ++-
 sc/source/core/tool/reftokenhelper.cxx   |   83 
 sc/source/core/tool/refupdat.cxx         |  375 ++--
 sc/source/core/tool/scopetools.cxx       |   11 
 sc/source/core/tool/token.cxx            |  645 +++++--
 sc/source/filter/excel/excform.cxx       |   53 
 sc/source/filter/excel/excform8.cxx      |   73 
 sc/source/filter/excel/xeformula.cxx     |   39 
 sc/source/filter/excel/xelink.cxx        |   35 
 sc/source/filter/excel/xichart.cxx       |    2 
 sc/source/filter/excel/xiname.cxx        |    4 
 sc/source/filter/inc/xelink.hxx          |    4 
 sc/source/filter/lotus/lotform.cxx       |   31 
 sc/source/filter/qpro/qproform.cxx       |   30 
 sc/source/ui/miscdlgs/anyrefdg.cxx       |    7 
 sc/source/ui/unoobj/chart2uno.cxx        |   42 
 sc/source/ui/vba/vbanames.cxx            |    2 
 sc/source/ui/view/viewfun6.cxx           |    8 
 69 files changed, 6059 insertions(+), 4681 deletions(-)

New commits:
commit e0e236c49c441b43d7c0a19077250a3016be4c64
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date:   Wed Jul 24 22:28:01 2013 -0400

    These static_cast's are very significant. Don't remove these.
    
    This fixes the shared formula import from xls.
    
    Change-Id: I8790e642c3ef1a335694891eb574a68ae5270649

diff --git a/sc/qa/unit/filters-test.cxx b/sc/qa/unit/filters-test.cxx
index 6b4419c..1d7eeca 100644
--- a/sc/qa/unit/filters-test.cxx
+++ b/sc/qa/unit/filters-test.cxx
@@ -84,8 +84,8 @@ public:
     CPPUNIT_TEST(testContentXLSX);
     CPPUNIT_TEST(testContentLotus123);
     CPPUNIT_TEST(testContentDIF);
-//  CPPUNIT_TEST(testSharedFormulaXLS);
-//  CPPUNIT_TEST(testSharedFormulaXLSX);
+    CPPUNIT_TEST(testSharedFormulaXLS);
+    CPPUNIT_TEST(testSharedFormulaXLSX);
     CPPUNIT_TEST(testLegacyCellAnchoredRotatedShape);
 
 #if TEST_BUG_FILES
diff --git a/sc/source/filter/excel/excform8.cxx b/sc/source/filter/excel/excform8.cxx
index 862584e..4810a7e 100644
--- a/sc/source/filter/excel/excform8.cxx
+++ b/sc/source/filter/excel/excform8.cxx
@@ -1433,9 +1433,9 @@ void ExcelToSc8::ExcRelToScRel8( sal_uInt16 nRow, sal_uInt16 nC, ScSingleRefData
     {
         // C O L
         if( bColRel )
-            rSRD.SetRelCol(nC);
+            rSRD.SetRelCol(static_cast<SCCOL>(static_cast<sal_Int8>(nC)));
         else
-            rSRD.SetAbsCol(nCol);
+            rSRD.SetAbsCol(static_cast<SCCOL>(nCol));
 
         // R O W
         if( bRowRel )
commit e481282f576c46e02c836524dd000cadca14091a
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date:   Wed Jul 24 20:32:18 2013 -0400

    Work on remving direct access to ScSingleRefData's data members.
    
    This broke the shared formula import from xls and xlsx. Disabling the
    tests for now.
    
    Change-Id: I75d802b00947b21083db19b5c07204a0c3d4f369

diff --git a/sc/inc/refdata.hxx b/sc/inc/refdata.hxx
index ccbc1c5..629e8ae 100644
--- a/sc/inc/refdata.hxx
+++ b/sc/inc/refdata.hxx
@@ -65,6 +65,13 @@ struct SC_DLLPUBLIC ScSingleRefData
     inline  void SetTabRel( bool bVal ) { Flags.bTabRel = (bVal ? true : false ); }
     inline  bool IsTabRel() const       { return Flags.bTabRel; }
 
+    void SetAbsCol( SCCOL nVal );
+    void SetRelCol( SCCOL nVal );
+    void SetAbsRow( SCROW nVal );
+    void SetRelRow( SCROW nVal );
+    void SetAbsTab( SCTAB nVal );
+    void SetRelTab( SCTAB nVal );
+
     void SetColDeleted( bool bVal );
     bool IsColDeleted() const;
     void SetRowDeleted( bool bVal );
@@ -84,9 +91,9 @@ struct SC_DLLPUBLIC ScSingleRefData
 
     ScAddress toAbs( const ScAddress& rPos ) const;
     void SetAddress( const ScAddress& rAddr, const ScAddress& rPos );
-    SCROW GetRow() const;
-    SCCOL GetCol() const;
-    SCTAB GetTab() const;
+    SCROW Row() const;
+    SCCOL Col() const;
+    SCTAB Tab() const;
 
             bool operator==( const ScSingleRefData& ) const;
             bool operator!=( const ScSingleRefData& ) const;
diff --git a/sc/qa/unit/filters-test.cxx b/sc/qa/unit/filters-test.cxx
index 1d7eeca..6b4419c 100644
--- a/sc/qa/unit/filters-test.cxx
+++ b/sc/qa/unit/filters-test.cxx
@@ -84,8 +84,8 @@ public:
     CPPUNIT_TEST(testContentXLSX);
     CPPUNIT_TEST(testContentLotus123);
     CPPUNIT_TEST(testContentDIF);
-    CPPUNIT_TEST(testSharedFormulaXLS);
-    CPPUNIT_TEST(testSharedFormulaXLSX);
+//  CPPUNIT_TEST(testSharedFormulaXLS);
+//  CPPUNIT_TEST(testSharedFormulaXLSX);
     CPPUNIT_TEST(testLegacyCellAnchoredRotatedShape);
 
 #if TEST_BUG_FILES
diff --git a/sc/qa/unit/ucalc_formula.cxx b/sc/qa/unit/ucalc_formula.cxx
index af5f96c..6407b72 100644
--- a/sc/qa/unit/ucalc_formula.cxx
+++ b/sc/qa/unit/ucalc_formula.cxx
@@ -122,17 +122,17 @@ void Test::testFormulaRefData()
     ScSingleRefData aRef;
     aRef.InitAddress(aAddr);
     CPPUNIT_ASSERT_MESSAGE("Wrong ref data state.", !aRef.IsRowRel() && !aRef.IsColRel() && !aRef.IsTabRel());
-    ASSERT_EQUAL_TYPE(SCCOL, 4, aRef.GetCol());
-    ASSERT_EQUAL_TYPE(SCROW, 5, aRef.GetRow());
-    ASSERT_EQUAL_TYPE(SCTAB, 3, aRef.GetTab());
+    ASSERT_EQUAL_TYPE(SCCOL, 4, aRef.Col());
+    ASSERT_EQUAL_TYPE(SCROW, 5, aRef.Row());
+    ASSERT_EQUAL_TYPE(SCTAB, 3, aRef.Tab());
 
     aRef.SetRowRel(true);
     aRef.SetColRel(true);
     aRef.SetTabRel(true);
     aRef.SetAddress(aAddr, aPos);
-    ASSERT_EQUAL_TYPE(SCCOL, 2, aRef.GetCol());
-    ASSERT_EQUAL_TYPE(SCROW, 3, aRef.GetRow());
-    ASSERT_EQUAL_TYPE(SCTAB, 1, aRef.GetTab());
+    ASSERT_EQUAL_TYPE(SCCOL, 2, aRef.Col());
+    ASSERT_EQUAL_TYPE(SCROW, 3, aRef.Row());
+    ASSERT_EQUAL_TYPE(SCTAB, 1, aRef.Tab());
 
     // Test extension of range reference.
 
diff --git a/sc/source/core/data/conditio.cxx b/sc/source/core/data/conditio.cxx
index 3792d5d..fe438af 100644
--- a/sc/source/core/data/conditio.cxx
+++ b/sc/source/core/data/conditio.cxx
@@ -1391,7 +1391,7 @@ void ScConditionEntry::SourceChanged( const ScAddress& rChanged )
                     SCsTAB nTab2;
 
                     if ( aProv.Ref1.IsColRel() )
-                        nCol2 = rChanged.Col() - aProv.Ref1.nRelCol;
+                        nCol2 = rChanged.Col() - aProv.Ref1.Col();
                     else
                     {
                         bHit &= ( rChanged.Col() >= aProv.Ref1.nCol );
@@ -1413,7 +1413,7 @@ void ScConditionEntry::SourceChanged( const ScAddress& rChanged )
                     }
 
                     if ( aProv.Ref2.IsColRel() )
-                        nCol1 = rChanged.Col() - aProv.Ref2.nRelCol;
+                        nCol1 = rChanged.Col() - aProv.Ref2.Col();
                     else
                     {
                         bHit &= ( rChanged.Col() <= aProv.Ref2.nCol );
diff --git a/sc/source/core/data/formulacell.cxx b/sc/source/core/data/formulacell.cxx
index 158dc85..694b81d 100644
--- a/sc/source/core/data/formulacell.cxx
+++ b/sc/source/core/data/formulacell.cxx
@@ -2718,17 +2718,17 @@ void ScFormulaCell::TransposeReference()
             ScSingleRefData& rRef2 = (bDouble ? t->GetDoubleRef().Ref2 : rRef1);
             if ( !bDouble || (rRef2.IsColRel() && rRef2.IsRowRel()) )
             {
-                sal_Int16 nTemp;
+                SCCOLROW nTemp;
 
-                nTemp = rRef1.nRelCol;
-                rRef1.nRelCol = static_cast<SCCOL>(rRef1.nRelRow);
-                rRef1.nRelRow = static_cast<SCROW>(nTemp);
+                nTemp = rRef1.Col();
+                rRef1.SetRelCol(rRef1.Row());
+                rRef1.SetRelRow(nTemp);
 
                 if ( bDouble )
                 {
-                    nTemp = rRef2.nRelCol;
-                    rRef2.nRelCol = static_cast<SCCOL>(rRef2.nRelRow);
-                    rRef2.nRelRow = static_cast<SCROW>(nTemp);
+                    nTemp = rRef2.Col();
+                    rRef2.SetRelCol(rRef2.Row());
+                    rRef2.SetRelRow(nTemp);
                 }
 
                 bFound = true;
diff --git a/sc/source/core/tool/chgtrack.cxx b/sc/source/core/tool/chgtrack.cxx
index 6b132ad..5265722 100644
--- a/sc/source/core/tool/chgtrack.cxx
+++ b/sc/source/core/tool/chgtrack.cxx
@@ -1977,19 +1977,16 @@ static void lcl_InvalidateReference( ScToken& rTok, const ScBigAddress& rPos )
     if ( rPos.Col() < 0 || MAXCOL < rPos.Col() )
     {
         rRef1.nCol = SCCOL_MAX;
-        rRef1.nRelCol = SCCOL_MAX;
         rRef1.SetColDeleted( true );
     }
     if ( rPos.Row() < 0 || MAXROW < rPos.Row() )
     {
         rRef1.nRow = SCROW_MAX;
-        rRef1.nRelRow = SCROW_MAX;
         rRef1.SetRowDeleted( true );
     }
     if ( rPos.Tab() < 0 || MAXTAB < rPos.Tab() )
     {
         rRef1.nTab = SCTAB_MAX;
-        rRef1.nRelTab = SCTAB_MAX;
         rRef1.SetTabDeleted( true );
     }
     if ( rTok.GetType() == formula::svDoubleRef )
@@ -1998,19 +1995,16 @@ static void lcl_InvalidateReference( ScToken& rTok, const ScBigAddress& rPos )
         if ( rPos.Col() < 0 || MAXCOL < rPos.Col() )
         {
             rRef2.nCol = SCCOL_MAX;
-            rRef2.nRelCol = SCCOL_MAX;
             rRef2.SetColDeleted( true );
         }
         if ( rPos.Row() < 0 || MAXROW < rPos.Row() )
         {
             rRef2.nRow = SCROW_MAX;
-            rRef2.nRelRow = SCROW_MAX;
             rRef2.SetRowDeleted( true );
         }
         if ( rPos.Tab() < 0 || MAXTAB < rPos.Tab() )
         {
             rRef2.nTab = SCTAB_MAX;
-            rRef2.nRelTab = SCTAB_MAX;
             rRef2.SetTabDeleted( true );
         }
     }
diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx
index 4a87147..cfee0cd 100644
--- a/sc/source/core/tool/compiler.cxx
+++ b/sc/source/core/tool/compiler.cxx
@@ -1448,8 +1448,9 @@ r1c1_add_col( OUStringBuffer &rBuf, const ScSingleRefData& rRef, const ScAddress
     rBuf.append( sal_Unicode( 'C' ) );
     if( rRef.IsColRel() )
     {
-        if (rRef.nRelCol != 0)
-            rBuf.append("[").append( OUString::number( rRef.nRelCol ) ).append("]");
+        SCCOL nCol = rRef.Col();
+        if (nCol != 0)
+            rBuf.append("[").append(OUString::number(nCol)).append("]");
     }
     else
         rBuf.append( OUString::number( rAbsRef.Col() + 1 ) );
diff --git a/sc/source/core/tool/interpr4.cxx b/sc/source/core/tool/interpr4.cxx
index 84fae09..c3a47a4 100644
--- a/sc/source/core/tool/interpr4.cxx
+++ b/sc/source/core/tool/interpr4.cxx
@@ -1127,17 +1127,20 @@ void ScInterpreter::SingleRefToVars( const ScSingleRefData & rRef,
         SCCOL & rCol, SCROW & rRow, SCTAB & rTab )
 {
     if ( rRef.IsColRel() )
-        rCol = aPos.Col() + rRef.nRelCol;
+        rCol = aPos.Col() + rRef.Col();
     else
-        rCol = rRef.nCol;
+        rCol = rRef.Col();
+
     if ( rRef.IsRowRel() )
-        rRow = aPos.Row() + rRef.nRelRow;
+        rRow = aPos.Row() + rRef.Row();
     else
-        rRow = rRef.nRow;
+        rRow = rRef.Row();
+
     if ( rRef.IsTabRel() )
-        rTab = aPos.Tab() + rRef.nRelTab;
+        rTab = aPos.Tab() + rRef.Tab();
     else
-        rTab = rRef.nTab;
+        rTab = rRef.Tab();
+
     if( !ValidCol( rCol) || rRef.IsColDeleted() )
         SetError( errNoRef ), rCol = 0;
     if( !ValidRow( rRow) || rRef.IsRowDeleted() )
diff --git a/sc/source/core/tool/rangenam.cxx b/sc/source/core/tool/rangenam.cxx
index 84c3fb9..51d402f 100644
--- a/sc/source/core/tool/rangenam.cxx
+++ b/sc/source/core/tool/rangenam.cxx
@@ -213,22 +213,22 @@ void ScRangeData::GuessPosition()
     while ( ( t = static_cast<ScToken*>(pCode->GetNextReference()) ) != NULL )
     {
         ScSingleRefData& rRef1 = t->GetSingleRef();
-        if ( rRef1.IsColRel() && rRef1.nRelCol < nMinCol )
-            nMinCol = rRef1.nRelCol;
-        if ( rRef1.IsRowRel() && rRef1.nRelRow < nMinRow )
-            nMinRow = rRef1.nRelRow;
-        if ( rRef1.IsTabRel() && rRef1.nRelTab < nMinTab )
-            nMinTab = rRef1.nRelTab;
+        if ( rRef1.IsColRel() && rRef1.Col() < nMinCol )
+            nMinCol = rRef1.Col();
+        if ( rRef1.IsRowRel() && rRef1.Row() < nMinRow )
+            nMinRow = rRef1.Row();
+        if ( rRef1.IsTabRel() && rRef1.Tab() < nMinTab )
+            nMinTab = rRef1.Tab();
 
         if ( t->GetType() == svDoubleRef )
         {
             ScSingleRefData& rRef2 = t->GetDoubleRef().Ref2;
-            if ( rRef2.IsColRel() && rRef2.nRelCol < nMinCol )
-                nMinCol = rRef2.nRelCol;
-            if ( rRef2.IsRowRel() && rRef2.nRelRow < nMinRow )
-                nMinRow = rRef2.nRelRow;
-            if ( rRef2.IsTabRel() && rRef2.nRelTab < nMinTab )
-                nMinTab = rRef2.nRelTab;
+            if ( rRef2.IsColRel() && rRef2.Col() < nMinCol )
+                nMinCol = rRef2.Col();
+            if ( rRef2.IsRowRel() && rRef2.Row() < nMinRow )
+                nMinRow = rRef2.Row();
+            if ( rRef2.IsTabRel() && rRef2.Tab() < nMinTab )
+                nMinTab = rRef2.Tab();
         }
     }
 
diff --git a/sc/source/core/tool/refdata.cxx b/sc/source/core/tool/refdata.cxx
index a804838..a4735dc 100644
--- a/sc/source/core/tool/refdata.cxx
+++ b/sc/source/core/tool/refdata.cxx
@@ -40,6 +40,42 @@ void ScSingleRefData::InitAddressRel( const ScAddress& rAdr, const ScAddress& rP
     SetAddress(rAdr, rPos);
 }
 
+void ScSingleRefData::SetAbsCol( SCCOL nVal )
+{
+    Flags.bColRel = false;
+    nCol = nVal;
+}
+
+void ScSingleRefData::SetRelCol( SCCOL nVal )
+{
+    Flags.bColRel = true;
+    nRelCol = nVal;
+}
+
+void ScSingleRefData::SetAbsRow( SCROW nVal )
+{
+    Flags.bRowRel = false;
+    nRow = nVal;
+}
+
+void ScSingleRefData::SetRelRow( SCROW nVal )
+{
+    Flags.bRowRel = true;
+    nRelRow = nVal;
+}
+
+void ScSingleRefData::SetAbsTab( SCTAB nVal )
+{
+    Flags.bTabRel = false;
+    nTab = nVal;
+}
+
+void ScSingleRefData::SetRelTab( SCTAB nVal )
+{
+    Flags.bTabRel = true;
+    nRelTab = nVal;
+}
+
 void ScSingleRefData::SetColDeleted( bool bVal )
 {
     Flags.bColDeleted = (bVal ? true : false );
@@ -127,21 +163,21 @@ void ScSingleRefData::SetAddress( const ScAddress& rAddr, const ScAddress& rPos
         nTab = rAddr.Tab();
 }
 
-SCROW ScSingleRefData::GetRow() const
+SCROW ScSingleRefData::Row() const
 {
     if (Flags.bRowDeleted)
         return -1;
     return Flags.bRowRel ? nRelRow : nRow;
 }
 
-SCCOL ScSingleRefData::GetCol() const
+SCCOL ScSingleRefData::Col() const
 {
     if (Flags.bColDeleted)
         return -1;
     return Flags.bColRel ? nRelCol : nCol;
 }
 
-SCTAB ScSingleRefData::GetTab() const
+SCTAB ScSingleRefData::Tab() const
 {
     if (Flags.bTabDeleted)
         return -1;
diff --git a/sc/source/core/tool/token.cxx b/sc/source/core/tool/token.cxx
index b783525..53172ce 100644
--- a/sc/source/core/tool/token.cxx
+++ b/sc/source/core/tool/token.cxx
@@ -55,13 +55,6 @@ namespace
     {
         rRef.InitFlags();
 
-        rRef.nCol    = static_cast<SCsCOL>(rAPI.Column);
-        rRef.nRow    = static_cast<SCsROW>(rAPI.Row);
-        rRef.nTab    = static_cast<SCsTAB>(rAPI.Sheet);
-        rRef.nRelCol = static_cast<SCsCOL>(rAPI.RelativeColumn);
-        rRef.nRelRow = static_cast<SCsROW>(rAPI.RelativeRow);
-        rRef.nRelTab = static_cast<SCsTAB>(rAPI.RelativeSheet);
-
         rRef.SetColRel(     ( rAPI.Flags & sheet::ReferenceFlags::COLUMN_RELATIVE ) != 0 );
         rRef.SetRowRel(     ( rAPI.Flags & sheet::ReferenceFlags::ROW_RELATIVE    ) != 0 );
         rRef.SetTabRel(     ( rAPI.Flags & sheet::ReferenceFlags::SHEET_RELATIVE  ) != 0 );
@@ -70,27 +63,47 @@ namespace
         rRef.SetTabDeleted( ( rAPI.Flags & sheet::ReferenceFlags::SHEET_DELETED   ) != 0 );
         rRef.SetFlag3D(     ( rAPI.Flags & sheet::ReferenceFlags::SHEET_3D        ) != 0 );
         rRef.SetRelName(    ( rAPI.Flags & sheet::ReferenceFlags::RELATIVE_NAME   ) != 0 );
+
+        if (rRef.IsColRel())
+            rRef.SetRelCol(static_cast<SCCOL>(rAPI.RelativeColumn));
+        else
+            rRef.SetAbsCol(static_cast<SCCOL>(rAPI.Column));
+
+        if (rRef.IsRowRel())
+            rRef.SetRelRow(static_cast<SCROW>(rAPI.RelativeRow));
+        else
+            rRef.SetAbsRow(static_cast<SCROW>(rAPI.Row));
+
+        if (rRef.IsTabRel())
+            rRef.SetRelTab(static_cast<SCsTAB>(rAPI.RelativeSheet));
+        else
+            rRef.SetAbsTab(static_cast<SCsTAB>(rAPI.Sheet));
     }
 
     void lcl_ExternalRefToCalc( ScSingleRefData& rRef, const sheet::SingleReference& rAPI )
     {
         rRef.InitFlags();
 
-        rRef.nCol    = static_cast<SCsCOL>(rAPI.Column);
-        rRef.nRow    = static_cast<SCsROW>(rAPI.Row);
-        rRef.nTab    = 0;
-        rRef.nRelCol = static_cast<SCsCOL>(rAPI.RelativeColumn);
-        rRef.nRelRow = static_cast<SCsROW>(rAPI.RelativeRow);
-        rRef.nRelTab = 0;
-
         rRef.SetColRel(     ( rAPI.Flags & sheet::ReferenceFlags::COLUMN_RELATIVE ) != 0 );
         rRef.SetRowRel(     ( rAPI.Flags & sheet::ReferenceFlags::ROW_RELATIVE    ) != 0 );
-        rRef.SetTabRel(     false );    // sheet index must be absolute for external refs
         rRef.SetColDeleted( ( rAPI.Flags & sheet::ReferenceFlags::COLUMN_DELETED  ) != 0 );
         rRef.SetRowDeleted( ( rAPI.Flags & sheet::ReferenceFlags::ROW_DELETED     ) != 0 );
         rRef.SetTabDeleted( false );    // sheet must not be deleted for external refs
         rRef.SetFlag3D(     ( rAPI.Flags & sheet::ReferenceFlags::SHEET_3D        ) != 0 );
         rRef.SetRelName(    false );
+
+        if (rRef.IsColRel())
+            rRef.SetRelCol(static_cast<SCCOL>(rAPI.RelativeColumn));
+        else
+            rRef.SetAbsCol(static_cast<SCCOL>(rAPI.Column));
+
+        if (rRef.IsRowRel())
+            rRef.SetRelRow(static_cast<SCROW>(rAPI.RelativeRow));
+        else
+            rRef.SetAbsRow(static_cast<SCROW>(rAPI.Row));
+
+        // sheet index must be absolute for external refs
+        rRef.SetAbsTab(0);
     }
 //
 } // namespace
@@ -449,9 +462,6 @@ static ScSingleRefData lcl_ScToken_InitSingleRef()
 {
     ScSingleRefData aRef;
     aRef.InitAddress( ScAddress() );
-    aRef.nRelCol = 0;
-    aRef.nRelRow = 0;
-    aRef.nRelTab = 0;
     return aRef;
 }
 
diff --git a/sc/source/filter/excel/excform.cxx b/sc/source/filter/excel/excform.cxx
index 3f8f008..1080b4b 100644
--- a/sc/source/filter/excel/excform.cxx
+++ b/sc/source/filter/excel/excform.cxx
@@ -1594,57 +1594,48 @@ void ExcelToSc::ExcRelToScRel( sal_uInt16 nRow, sal_uInt8 nCol, ScSingleRefData
     {
         // C O L
         if( nRow & 0x4000 )
-        {//                                                         rel Col
-            rSRD.SetColRel( sal_True );
-            rSRD.nRelCol = static_cast<SCsCOL>(static_cast<sal_Int8>(nCol));
-        }
+            rSRD.SetRelCol(nCol);
         else
-        {//                                                         abs Col
-            rSRD.SetColRel( false );
-            rSRD.nCol = static_cast<SCCOL>(nCol);
-        }
+            rSRD.SetAbsCol(nCol);
 
         // R O W
         if( nRow & 0x8000 )
         {//                                                         rel Row
-            rSRD.SetRowRel( sal_True );
             if( nRow & 0x2000 ) // Bit 13 set?
-                //                                              -> Row negative
-                rSRD.nRelRow = static_cast<SCsROW>(static_cast<sal_Int16>(nRow | 0xC000));
+                // Row negative
+                rSRD.SetRelRow(nRow | 0xC000);
             else
-                //                                              -> Row positive
-                rSRD.nRelRow = static_cast<SCsROW>(nRow & nRowMask);
+                // Row positive
+                rSRD.SetRelRow(nRow & nRowMask);
         }
         else
         {//                                                         abs Row
-            rSRD.SetRowRel( false );
-            rSRD.nRow = static_cast<SCROW>(nRow & nRowMask);
+            rSRD.SetAbsRow(nRow & nRowMask);
         }
 
         // T A B
         // abs needed if rel in shared formula for ScCompiler UpdateNameReference
         if ( rSRD.IsTabRel() && !rSRD.IsFlag3D() )
-            rSRD.nTab = GetCurrScTab();
+            rSRD.SetAbsTab(GetCurrScTab());
     }
     else
     {
-        // C O L
-        rSRD.SetColRel( ( nRow & 0x4000 ) > 0 );
-        rSRD.nCol = static_cast<SCsCOL>(nCol);
+        bool bColRel = (nRow & 0x4000) > 0;
+        bool bRowRel = (nRow & 0x8000) > 0;
 
-        // R O W
-        rSRD.SetRowRel( ( nRow & 0x8000 ) > 0 );
-        rSRD.nRow = static_cast<SCsROW>(nRow & nRowMask);
+        if (bColRel)
+            rSRD.SetRelCol(nCol - aEingPos.Col());
+        else
+            rSRD.SetAbsCol(nCol);
 
-        if ( rSRD.IsColRel() )
-            rSRD.nRelCol = rSRD.nCol - aEingPos.Col();
-        if ( rSRD.IsRowRel() )
-            rSRD.nRelRow = rSRD.nRow - aEingPos.Row();
+        rSRD.SetAbsRow(nRow & nRowMask);
+        if (bRowRel)
+            rSRD.SetRelRow(rSRD.Row() - aEingPos.Row());
 
         // T A B
         // #i10184# abs needed if rel in shared formula for ScCompiler UpdateNameReference
         if ( rSRD.IsTabRel() && !rSRD.IsFlag3D() )
-            rSRD.nTab = GetCurrScTab() + rSRD.nRelTab;
+            rSRD.SetAbsTab(GetCurrScTab() + rSRD.Tab());
     }
 }
 
@@ -1756,9 +1747,9 @@ void ExcelToSc::SetComplCol( ScComplexRefData &rCRD )
 {
     ScSingleRefData &rSRD = rCRD.Ref2;
     if( rSRD.IsColRel() )
-        rSRD.nRelCol = MAXCOL - aEingPos.Col();
+        rSRD.SetRelCol(MAXCOL - aEingPos.Col());
     else
-        rSRD.nCol = MAXCOL;
+        rSRD.SetAbsCol(MAXCOL);
 }
 
 
@@ -1766,9 +1757,9 @@ void ExcelToSc::SetComplRow( ScComplexRefData &rCRD )
 {
     ScSingleRefData &rSRD = rCRD.Ref2;
     if( rSRD.IsRowRel() )
-        rSRD.nRelRow = MAXROW - aEingPos.Row();
+        rSRD.SetRelRow(MAXROW - aEingPos.Row());
     else
-        rSRD.nRow = MAXROW;
+        rSRD.SetAbsRow(MAXROW);
 }
 
 void ExcelToSc::ReadExtensionArray( unsigned int n, XclImpStream& aIn )
diff --git a/sc/source/filter/excel/excform8.cxx b/sc/source/filter/excel/excform8.cxx
index 3eec9ba..862584e 100644
--- a/sc/source/filter/excel/excform8.cxx
+++ b/sc/source/filter/excel/excform8.cxx
@@ -1425,60 +1425,37 @@ ConvErr ExcelToSc8::ConvertExternName( const ScTokenArray*& rpArray, XclImpStrea
 
 void ExcelToSc8::ExcRelToScRel8( sal_uInt16 nRow, sal_uInt16 nC, ScSingleRefData &rSRD, const sal_Bool bName )
 {
-    const sal_Bool      bColRel = ( nC & 0x4000 ) != 0;
-    const sal_Bool      bRowRel = ( nC & 0x8000 ) != 0;
-    const sal_uInt8     nCol = static_cast<sal_uInt8>(nC);
-
-    rSRD.SetColRel( bColRel );
-    rSRD.SetRowRel( bRowRel );
+    const bool bColRel = ( nC & 0x4000 ) != 0;
+    const bool bRowRel = ( nC & 0x8000 ) != 0;
+    const sal_uInt8 nCol = static_cast<sal_uInt8>(nC);
 
     if( bName )
     {
         // C O L
         if( bColRel )
-            //                                                          rel Col
-            rSRD.nRelCol = static_cast<SCsCOL>(static_cast<sal_Int8>(nC));
+            rSRD.SetRelCol(nC);
         else
-            //                                                          abs Col
-            rSRD.nCol = static_cast<SCCOL>(nCol);
+            rSRD.SetAbsCol(nCol);
 
         // R O W
         if( bRowRel )
-            //                                                          rel Row
-            rSRD.nRelRow = static_cast<SCsROW>(static_cast<sal_Int16>(nRow));
+            rSRD.SetRelRow(nRow);
         else
-            //                                                          abs Row
-            rSRD.nRow = std::min( static_cast<SCROW>(nRow), MAXROW);
-
-        // T A B
-        // abs needed if rel in shared formula for ScCompiler UpdateNameReference
-        if ( rSRD.IsTabRel() && !rSRD.IsFlag3D() )
-            rSRD.nTab = GetCurrScTab();
+            rSRD.SetAbsRow(std::min( static_cast<SCROW>(nRow), MAXROW));
     }
     else
     {
         // C O L
         if ( bColRel )
-        {
-            rSRD.nRelCol = static_cast<SCsCOL>(nCol) - aEingPos.Col();
-            rSRD.nCol = rSRD.nRelCol;
-        }
+            rSRD.SetRelCol(static_cast<SCCOL>(nCol) - aEingPos.Col());
         else
-            rSRD.nCol = static_cast<SCCOL>(nCol);
+            rSRD.SetAbsCol(nCol);
 
         // R O W
         if ( bRowRel )
-        {
-            rSRD.nRelRow = static_cast<SCsROW>(nRow) - aEingPos.Row();
-            rSRD.nRow = rSRD.nRelRow;
-        }
+            rSRD.SetRelRow(static_cast<SCROW>(nRow) - aEingPos.Row());
         else
-            rSRD.nRow = static_cast<SCROW>(nRow);
-
-        // T A B
-        // #i10184# abs needed if rel in shared formula for ScCompiler UpdateNameReference
-        if ( rSRD.IsTabRel() && !rSRD.IsFlag3D() )
-            rSRD.nTab = GetCurrScTab() + rSRD.nRelTab;
+            rSRD.SetAbsRow(nRow);
     }
 }
 
diff --git a/sc/source/filter/lotus/lotform.cxx b/sc/source/filter/lotus/lotform.cxx
index 33e8456..75460cd 100644
--- a/sc/source/filter/lotus/lotform.cxx
+++ b/sc/source/filter/lotus/lotform.cxx
@@ -215,24 +215,22 @@ void LotusToSc::LotusRelToScRel( sal_uInt16 nCol, sal_uInt16 nRow, ScSingleRefDa
     // Col-Bemachung
     if( nCol & 0x8000 )
     {
-        rSRD.SetColRel( sal_True );
         if( nCol & 0x0080 )
             nCol |= 0xFF00;
         else
             nCol &= 0x00FF;
-        // #i36252# first cast unsigned 16-bit to signed 16-bit, and then to SCsCOL
-        rSRD.nRelCol = static_cast< SCsCOL >( static_cast< sal_Int16 >( nCol ) );
+        // #i36252# first cast unsigned 16-bit to signed 16-bit, and then to SCCOL
+        rSRD.SetRelCol(static_cast<SCCOL>(static_cast<sal_Int16>(nCol)));
     }
     else
     {
-        rSRD.SetColRel( false );
-        rSRD.nCol = static_cast< SCsCOL >( nCol & 0x00FF );
+        rSRD.SetAbsCol(static_cast<SCCOL>(nCol & 0x00FF));
     }
 
     // Row-Bemachung
     if( nRow & 0x8000 )
     {
-        rSRD.SetRowRel( sal_True );
+        rSRD.SetRowRel(true);
         // vorzeichenrichtige Erweiterung
         switch( eTyp )
         {
@@ -259,7 +257,7 @@ void LotusToSc::LotusRelToScRel( sal_uInt16 nCol, sal_uInt16 nRow, ScSingleRefDa
     }
     else
     {
-        rSRD.SetRowRel( false );
+        rSRD.SetRowRel(false);
         switch( eTyp )
         {
             // 5432 1098 7654 3210
@@ -279,10 +277,10 @@ void LotusToSc::LotusRelToScRel( sal_uInt16 nCol, sal_uInt16 nRow, ScSingleRefDa
     }
 
     if( rSRD.IsRowRel() )
-        // #i36252# first cast unsigned 16-bit to signed 16-bit, and then to SCsROW
-        rSRD.nRelRow = static_cast< SCsROW >( static_cast< sal_Int16 >( nRow ) );
+        // #i36252# first cast unsigned 16-bit to signed 16-bit, and then to SCROW
+        rSRD.SetRelRow(static_cast<SCROW>(static_cast<sal_Int16>(nRow)));
     else
-        rSRD.nRow = static_cast< SCsROW >( nRow );
+        rSRD.SetAbsRow(static_cast<SCROW>(nRow));
 }
 
 
diff --git a/sc/source/filter/qpro/qproform.cxx b/sc/source/filter/qpro/qproform.cxx
index 89c6c03..024a2a7 100644
--- a/sc/source/filter/qpro/qproform.cxx
+++ b/sc/source/filter/qpro/qproform.cxx
@@ -35,40 +35,34 @@ void QProToSc::ReadSRD( ScSingleRefData& rSRD, sal_Int8 nPage, sal_Int8 nCol, sa
     rSRD.InitAddress( ScAddress( nCol, (~nTmp + 1), 0 ) );
     if( nRelBit & 0x4000 )
     {
-        rSRD.nRelCol = nCol;
-        rSRD.SetColRel( sal_True );
+        rSRD.SetRelCol(nCol);
     }
     else
     {
-        rSRD.nCol = nCol;
-        rSRD.SetColRel( false );
+        rSRD.SetAbsCol(nCol);
     }
+
     if( nRelBit & 0x2000 )
     {
-        rSRD.nRelRow = (~nTmp + 1);
-        rSRD.nRelRow = (sal_Int16)(nTmp << 3);
-        rSRD.nRelRow /= 8;
-        rSRD.SetRowRel( sal_True );
+        SCROW nRelRow = (~nTmp + 1);
+        nRelRow = (sal_Int16)(nTmp << 3); // This looks weird... Mistake?
+        nRelRow /= 8;
+        rSRD.SetRelRow(nRelRow);
     }
     else
     {
-        rSRD.nRow = nTmp;
-        rSRD.SetRowRel( false );
+        rSRD.SetAbsRow(nTmp);
     }
     if( nRelBit & 0x8000 )
     {
-        rSRD.nRelTab = nPage;
-        rSRD.SetTabRel( sal_True );
-        // absolute tab needed in caller for comparison in case of DoubleRef
-        rSRD.nTab = aEingPos.Tab() + nPage;
+        rSRD.SetRelTab(nPage);
     }
     else
     {
-        rSRD.nTab = nPage;
-        rSRD.SetTabRel( false );
+        rSRD.SetAbsTab(nPage);
     }
-    if (rSRD.nTab != aEingPos.Tab())
-        rSRD.SetFlag3D( sal_True);
+    if (rSRD.toAbs(aEingPos).Tab() != aEingPos.Tab())
+        rSRD.SetFlag3D(true);
 }
 
 QProToSc::QProToSc( SvStream& rStream, const ScAddress& rRefPos ) :
commit 69b1caa11940141cf81daf9e63f22e230d777e32
Author: Haidong Lian <haidong at multicorewareinc.com>
Date:   Wed Jul 24 15:16:55 2013 -0400

    Add support for double in OpenCL kernel.
    
    * modified coding style.
    * merged arithmetic operators together.
    * added support for double in OpenCL kernel.
    * added an environment variable named SC_FLOAT, which, when set it to 1, will
      force to use float in OpenCL kernel. If not set, we will detect GPU, and if
      GPU supports double, use double for kernel, otherwise use float for kernel.
    
    Conflicts:
    	sc/source/core/opencl/openclwrapper.cxx
    	sc/source/core/opencl/openclwrapper.hxx
    
    Change-Id: I7cdec458d72837d3b22ba50c6d28f78797ee0d3b

diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 8de7713..cd0c694 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -104,7 +104,7 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
     size_t rowSize = xGroup->mnLength;
     fprintf(stderr,"rowSize at begin is ...%ld.\n",(long)rowSize);
     // The row quantity can be gotten from p2->GetArrayLength()
-    int nCount1 = 0, nCount2 = 0, nCount3 = 0;
+    uint nCount1 = 0, nCount2 = 0, nCount3 = 0;
     int nOclOp = 0;
     double *rResult = NULL; // Point to the output data from GPU
     rResult = (double *)malloc(sizeof(double) * rowSize*2);// For 2 columns(B,C)
@@ -115,18 +115,41 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
     }
     memset(rResult,0,rowSize);
     float * fpOclSrcData = NULL; // Point to the input data from CPU
+    double * dpOclSrcData = NULL;
     uint * npOclStartPos = NULL; // The first position for calculation,for example,the A1 in (=MAX(A1:A100))
     uint * npOclEndPos   = NULL; // The last position for calculation,for example, the A100 in (=MAX(A1:A100))
     float * fpLeftData   = NULL; // Left input for binary operator(+,-,*,/),for example,(=leftData+rightData)
     float * fpRightData  = NULL; // Right input for binary operator(+,-,*,/),for example,(=leftData/rightData)
                                  // The rightData can't be zero for "/"
+    double * dpLeftData = NULL;
+    double * dpRightData = NULL;
+
+    float * fpSaveData=NULL;            //It is a temp pointer point the preparing memory;
+    float * fpSumProMergeLfData = NULL; //It merge the more col to one col is the left operator
+    float * fpSumProMergeRtData = NULL; //It merge the more col to one col is the right operator
+    double * dpSaveData=NULL;
+    double * dpSumProMergeLfData = NULL;
+    double * dpSumProMergeRtData = NULL;
+    uint * npSumSize=NULL;      //It is a array to save the matix sizt(col *row)
+    int nSumproductSize=0;      //It is the merge array size
+    bool aIsAlloc=false;        //It is a flag to judge the fpSumProMergeLfData existed
+    unsigned int nCountMatix=0; //It is a count to save the calculate times
     static OclCalc ocl_calc;
+    bool isSumProduct=false;
     if(ocl_calc.GetOpenclState())
     {
         // Don't know how large the size will be applied previously, so create them as the rowSize or 65536
         // Don't know which formulae will be used previously, so create buffers for different formulae used probably
-        ocl_calc.CreateBuffer(fpOclSrcData,npOclStartPos,npOclEndPos,rowSize);
-        ocl_calc.CreateBuffer(fpLeftData,fpRightData,rowSize);
+        if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
+        {
+            ocl_calc.CreateBuffer64Bits(dpOclSrcData,npOclStartPos,npOclEndPos,rowSize);
+            ocl_calc.CreateBuffer64Bits(dpLeftData,dpRightData,rowSize);
+        }
+        else
+        {
+            ocl_calc.CreateBuffer32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rowSize);
+            ocl_calc.CreateBuffer32Bits(fpLeftData,fpRightData,rowSize);
+        }
         //printf("pptrr is %d,%d,%d\n",fpOclSrcData,npOclStartPos,npOclEndPos);
     }
 ///////////////////////////////////////////////////////////////////////////////////////////
@@ -159,12 +182,55 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
                     if (!p2->IsEndFixed())
                         nRowEnd += i;
                     size_t nRowSize = nRowEnd - nRowStart + 1;
+                    //store the a matix`s rowsize and colsize,use it to calculate the matix`s size
+                    ocl_calc.nFormulaRowSize = nRowSize;
+                    ocl_calc.nFormulaColSize = nColSize;
                     ScMatrixRef pMat(new ScMatrix(nColSize, nRowSize, 0.0));
                     if(ocl_calc.GetOpenclState())
                     {
                         npOclStartPos[i] = nRowStart; // record the start position
                         npOclEndPos[i]   = nRowEnd;   // record the end position
                     }
+                    int nTempOpcode;
+                    const formula::FormulaToken* pTemp = p;
+                    pTemp=aCode2.Next();
+                    nTempOpcode=pTemp->GetOpCode();
+                    while(1)
+                    {
+                        nTempOpcode=pTemp->GetOpCode();
+                        if(nTempOpcode!=ocOpen && nTempOpcode!=ocPush)
+                            break;
+                         pTemp=aCode2.Next();
+                    }
+                    if((!aIsAlloc) && (ocl_calc.GetOpenclState())&& (nTempOpcode == ocSumProduct))
+                    {
+                        //nColSize * rowSize is the data size , but except the the head of data will use less the nRowSize
+                        //the other all use nRowSize times . and it must aligen so add nRowSize-1.
+                        nSumproductSize = nRowSize+nColSize * rowSize*nRowSize-1;
+                        if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
+                            ocl_calc.CreateBuffer64Bits(dpSumProMergeLfData,dpSumProMergeRtData,npSumSize,nSumproductSize,rowSize);
+                        else
+                            ocl_calc.CreateBuffer32Bits(fpSumProMergeLfData,fpSumProMergeRtData,npSumSize,nSumproductSize,rowSize);
+                        aIsAlloc = true;
+                        isSumProduct=true;
+                    }
+                    if(isSumProduct)
+                    {
+                        if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
+                        {
+                            if(nCountMatix%2==0)
+                                dpSaveData = dpSumProMergeLfData;
+                            else
+                                dpSaveData = dpSumProMergeRtData;
+                        }
+                        else
+                        {
+                            if(nCountMatix%2==0)
+                                fpSaveData = fpSumProMergeLfData;
+                            else
+                                fpSaveData = fpSumProMergeRtData;
+                        }
+                    }
                     for (size_t nCol = 0; nCol < nColSize; ++nCol)
                     {
                         const double* pArray = rArrays[nCol];
@@ -177,9 +243,21 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
                         {
                             for( size_t u=nRowStart; u<=nRowEnd; u++ )
                             {
-                                // Many video cards can't support double type in kernel, so need transfer the double to float
-                                fpOclSrcData[u] = (float)pArray[u];
-                                //fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpOclSrcData[u]);
+                                if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
+                                {
+                                    dpOclSrcData[u] = pArray[u];
+                                    //fprintf(stderr,"dpOclSrcData[%d] is %f.\n",u,dpOclSrcData[u]);
+                                    if(isSumProduct)
+                                        dpSaveData[u+nRowSize*nCol + nRowStart* nColSize * nRowSize-nRowStart] = pArray[u];
+                                }
+                                else
+                                {
+                                    // Many video cards can't support double type in kernel, so need transfer the double to float
+                                    fpOclSrcData[u] = (float)pArray[u];
+                                    //fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpOclSrcData[u]);
+                                    if(isSumProduct)
+                                        fpSaveData[u+nRowSize*nCol + nRowStart* nColSize * nRowSize-nRowStart] = (float)pArray[u];
+                                }
                             }
                         }
 
@@ -195,6 +273,11 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
 
                     ScMatrixToken aTok(pMat);
                     aCode2.AddToken(aTok);
+                    if(isSumProduct)
+                    {
+                        npSumSize[nCountMatix/2] =nRowSize*nColSize;
+                        nCountMatix++;
+                    }
                 }
                 break;
                 default:
@@ -214,21 +297,32 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
                 OpCode eOp = pCur->GetOpCode();
                 if(eOp==0)
                 {
-                     if(nCount3%2==0)
-                         fpLeftData[nCount1++] = (float)pCur->GetDouble();
-                     else
-                         fpRightData[nCount2++] = (float)pCur->GetDouble();
-                     nCount3++;
+                    if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
+                    {
+                        if(nCount3%2==0)
+                            dpLeftData[nCount1++] = pCur->GetDouble();
+                        else
+                            dpRightData[nCount2++] = pCur->GetDouble();
+                        nCount3++;
+                    }
+                    else
+                    {
+                        if(nCount3%2==0)
+                            fpLeftData[nCount1++] = (float)pCur->GetDouble();
+                        else
+                            fpRightData[nCount2++] = (float)pCur->GetDouble();
+                        nCount3++;
+                    }
                 }
-                else if( eOp!=ocOpen && eOp!=ocClose )
+                else if( eOp!=ocOpen && eOp!=ocClose &&eOp != ocSep)
                     nOclOp = eOp;
 
 //              if(count1>0){//dbg
-//                  fprintf(stderr,"leftData is %f.\n",leftData[count1-1]);
+//                  fprintf(stderr,"leftData is %f.\n",fpLeftData[count1-1]);
 //                  count1--;
 //              }
 //              if(count2>0){//dbg
-//                  fprintf(stderr,"rightData is %f.\n",rightData[count2-1]);
+//                  fprintf(stderr,"rightData is %f.\n",fpRightData[count2-1]);
 //                  count2--;
 //              }
             }
@@ -249,52 +343,99 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
     // For GPU calculation
     if(getenv("SC_GPU")&&ocl_calc.GetOpenclState())
     {
-            fprintf(stderr,"ggGPU flow...\n\n");
-            printf(" oclOp is... %d\n",nOclOp);
-            osl_getSystemTime(&aTimeBefore); //timer
+        fprintf(stderr,"ggGPU flow...\n\n");
+        printf(" oclOp is... %d\n",nOclOp);
+        osl_getSystemTime(&aTimeBefore); //timer
+        if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
+        {
+            fprintf(stderr,"ggGPU double precision flow...\n\n");
+            //double precision
             switch(nOclOp)
             {
                 case ocAdd:
-                    ocl_calc.OclHostSignedAdd32Bits(fpLeftData,fpRightData,rResult,nCount1);
+                    ocl_calc.OclHostArithmeticOperator64Bits("oclSignedAdd",dpLeftData,dpRightData,rResult,nCount1);
                     break;
                 case ocSub:
-                    ocl_calc.OclHostSignedSub32Bits(fpLeftData,fpRightData,rResult,nCount1);
+                    ocl_calc.OclHostArithmeticOperator64Bits("oclSignedSub",dpLeftData,dpRightData,rResult,nCount1);
                     break;
                 case ocMul:
-                    ocl_calc.OclHostSignedMul32Bits(fpLeftData,fpRightData,rResult,nCount1);
+                    ocl_calc.OclHostArithmeticOperator64Bits("oclSignedMul",dpLeftData,dpRightData,rResult,nCount1);
                     break;
                 case ocDiv:
-                    ocl_calc.OclHostSignedDiv32Bits(fpLeftData,fpRightData,rResult,nCount1);
+                    ocl_calc.OclHostArithmeticOperator64Bits("oclSignedDiv",dpLeftData,dpRightData,rResult,nCount1);
                     break;
                 case ocMax:
-                    ocl_calc.OclHostFormulaMax32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
+                    ocl_calc.OclHostFormulaStatistics64Bits("oclFormulaMax",dpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
                     break;
                 case ocMin:
-                    ocl_calc.OclHostFormulaMin32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
+                    ocl_calc.OclHostFormulaStatistics64Bits("oclFormulaMin",dpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
                     break;
                 case ocAverage:
-                    ocl_calc.OclHostFormulaAverage32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
+                    ocl_calc.OclHostFormulaStatistics64Bits("oclFormulaAverage",dpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
                     break;
                 case ocSum:
-                    //ocl_calc.OclHostFormulaSum(srcData,rangeStart,rangeEnd,rResult,rowSize);
+                    ocl_calc.OclHostFormulaStatistics64Bits("oclFormulaSum",dpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
                     break;
                 case ocCount:
-                    //ocl_calc.OclHostFormulaCount(rangeStart,rangeEnd,rResult,rowSize);
+                    ocl_calc.OclHostFormulaCount64Bits(npOclStartPos,npOclEndPos,rResult,rowSize);
                     break;
                 case ocSumProduct:
-                    //ocl_calc.OclHostFormulaSumProduct(srcData,rangeStart,rangeEnd,rResult,rowSize);
+                    ocl_calc.OclHostFormulaSumProduct64Bits(dpSumProMergeLfData,dpSumProMergeRtData,npSumSize,rResult,rowSize);
                     break;
                 default:
                     fprintf(stderr,"No OpenCL function for this calculation.\n");
                     break;
-            }
-            /////////////////////////////////////////////////////
-            osl_getSystemTime(&aTimeAfter);
-            double diff = getTimeDiff(aTimeAfter, aTimeBefore);
-            //if (diff >= 1.0)
+              }
+        }
+        else
+        {
+            fprintf(stderr,"ggGPU float precision flow...\n\n");
+            //float precision
+            switch(nOclOp)
             {
-                fprintf(stderr,"OpenCL,diff...%f.\n",diff);
-            }
+                case ocAdd:
+                    ocl_calc.OclHostArithmeticOperator32Bits("oclSignedAdd",fpLeftData,fpRightData,rResult,nCount1);
+                    break;
+                case ocSub:
+                    ocl_calc.OclHostArithmeticOperator32Bits("oclSignedSub",fpLeftData,fpRightData,rResult,nCount1);
+                    break;
+                case ocMul:
+                    ocl_calc.OclHostArithmeticOperator32Bits("oclSignedMul",fpLeftData,fpRightData,rResult,nCount1);
+                    break;
+                case ocDiv:
+                    ocl_calc.OclHostArithmeticOperator32Bits("oclSignedDiv",fpLeftData,fpRightData,rResult,nCount1);
+                    break;
+                case ocMax:
+                    ocl_calc.OclHostFormulaStatistics32Bits("oclFormulaMax",fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
+                    break;
+                case ocMin:
+                    ocl_calc.OclHostFormulaStatistics32Bits("oclFormulaMin",fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
+                    break;
+                case ocAverage:
+                    ocl_calc.OclHostFormulaStatistics32Bits("oclFormulaAverage",fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
+                    break;
+                case ocSum:
+                    ocl_calc.OclHostFormulaStatistics32Bits("oclFormulaSum",fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
+                    break;
+                case ocCount:
+                    ocl_calc.OclHostFormulaCount32Bits(npOclStartPos,npOclEndPos,rResult,rowSize);
+                    break;
+                case ocSumProduct:
+                    ocl_calc.OclHostFormulaSumProduct32Bits(fpSumProMergeLfData,fpSumProMergeRtData,npSumSize,rResult,rowSize);
+                    break;
+                default:
+                    fprintf(stderr,"No OpenCL function for this calculation.\n");
+                    break;
+              }
+        }
+
+        /////////////////////////////////////////////////////
+        osl_getSystemTime(&aTimeAfter);
+        double diff = getTimeDiff(aTimeAfter, aTimeBefore);
+        //if (diff >= 1.0)
+        {
+            fprintf(stderr,"OpenCL,diff...%f.\n",diff);
+        }
 /////////////////////////////////////////////////////
 
 //rResult[i];
@@ -302,17 +443,12 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
 //               fprintf(stderr,"After GPU,rRsults[%d] is ...%f\n",i,rResult[i]);
 //           }
 
-            // Insert the double data, in rResult[i] back into the document
-            rDoc.SetFormulaResults(rTopPos, rResult, xGroup->mnLength);
-        }
-
-        if(rResult)
-            free(rResult);
+        // Insert the double data, in rResult[i] back into the document
+        rDoc.SetFormulaResults(rTopPos, rResult, xGroup->mnLength);
+    }
 
-        if(getenv("SC_GPUSAMPLE")){
-            //fprintf(stderr,"FormulaGroupInterpreter::interpret(),iniflag...%d\n",ocl_calc.GetOpenclState());
-            //ocl_calc.OclTest();//opencl test sample for debug
-        }
+    if(rResult)
+        free(rResult);
 
     return true;
 }
diff --git a/sc/source/core/opencl/oclkernels.hxx b/sc/source/core/opencl/oclkernels.hxx
index c231dbd..7c9bcaf 100644
--- a/sc/source/core/opencl/oclkernels.hxx
+++ b/sc/source/core/opencl/oclkernels.hxx
@@ -12,175 +12,119 @@
 
 #ifndef USE_EXTERNAL_KERNEL
 #define KERNEL( ... )# __VA_ARGS__
-
+// Double precision is a default of spreadsheets
+// cl_khr_fp64: Khronos extension
+// cl_amd_fp64: AMD extension
+// use build option outside to define fp_t
 /////////////////////////////////////////////
 const char *kernel_src = KERNEL(
-__kernel void hello(__global uint *buffer)
-
-{
-    size_t idx = get_global_id(0);
-    buffer[idx]=idx;
-}
-
-__kernel void oclformula(__global float *data,
-                       const uint type)
-{
-    const unsigned int i = get_global_id(0);
-
-    switch (type)
-    {
-        case 0:          //MAX
-        {
-            //printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]);
-            if(data[2*i]>data[2*i+1])
-                data[i] = data[2*i];
-            else
-                data[i] = data[2*i+1];
-            break;
-        }
-        case 1:          //MIN
-        {
-            //printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]);
-            if(data[2*i]<data[2*i+1])
-                data[i] = data[2*i];
-            else
-                data[i] = data[2*i+1];
-            break;
-        }
-        case 2:          //SUM
-        case 3:          //AVG
-        {
-            //printf("%d %d+%d\n",i,data[2*i],data[2*i+1]);
-            data[i] = data[2*i] + data[2*i+1];
-            break;
-        }
-        default:
-            break;
+\n#ifdef KHR_DP_EXTENSION\n
+\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n
+\n#elif AMD_DP_EXTENSION\n
+\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n
+\n#else\n
+\n#endif\n
 
-    }
-}
-
-
-__kernel void oclSignedAdd(__global float *ltData,__global float *rtData,__global float *otData)
+__kernel void oclSignedAdd(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
 {
     const unsigned int id = get_global_id(0);
     otData[id] = ltData[id] + rtData[id];
 }
 
 
-__kernel void oclSignedSub(__global float *ltData,__global float *rtData,__global float *otData)
+__kernel void oclSignedSub(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
 {
     const unsigned int id = get_global_id(0);
     otData[id] = ltData[id] - rtData[id];
-
 }
 
-__kernel void oclSignedMul(__global float *ltData,__global float *rtData,__global float *otData)
+__kernel void oclSignedMul(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
 {
     int id = get_global_id(0);
     otData[id] =ltData[id] * rtData[id];
 }
 
-__kernel void oclSignedDiv(__global float *ltData,__global float *rtData,__global float *otData)
+__kernel void oclSignedDiv(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
 {
     const unsigned int id = get_global_id(0);
     otData[id] = ltData[id] / rtData[id];
 }
 
-__kernel void oclFormulaMin(__global float *input,__global int *start,__global int *end,__global float *output)
+__kernel void oclFormulaMin(__global fp_t *input,__global int *start,__global int *end,__global fp_t *output)
 {
     const unsigned int id = get_global_id(0);
-    int i=0;
     unsigned int startFlag = start[id];
     unsigned int endFlag = end[id];
-    float min = input[startFlag];
-    for(i=startFlag;i<=endFlag;i++)
+    fp_t min = input[startFlag];
+    for(int i=startFlag;i<=endFlag;i++)
     {
         if(input[i]<min)
             min = input[i];
     }
     output[id] = min;
-
 }
 
-__kernel void oclFormulaMax(__global float *input,__global int *start,__global int *end,__global float *output)
+__kernel void oclFormulaMax(__global fp_t *input,__global int *start,__global int *end,__global fp_t *output)
 {
     const unsigned int id = get_global_id(0);
-    int i=0;
     unsigned int startFlag = start[id];
     unsigned int endFlag = end[id];
-    float max = input[startFlag];
-    for(i=startFlag;i<=endFlag;i++)
+    fp_t max = input[startFlag];
+    for(int i=startFlag;i<=endFlag;i++)
     {
         if(input[i]>max)
             max = input[i];
     }
     output[id] = max;
-
 }
 //Sum
-__kernel void oclFormulaSum(__global float *input,__global int *start,__global int *end,__global float *output)
+__kernel void oclFormulaSum(__global fp_t *input,__global int *start,__global int *end,__global fp_t *output)
 {
     const unsigned int nId = get_global_id(0);
-    float fSum = 0.0f;
+    fp_t fSum = 0.0;
     for(int i = start[nId]; i<=end[nId]; i++)
         fSum += input[i];
     output[nId] = fSum ;
 }
 //Count
-__kernel void oclFormulaCount(__global int *start,__global int *end,__global float *output)
+__kernel void oclFormulaCount(__global int *start,__global int *end,__global fp_t *output)
 {
     const unsigned int nId = get_global_id(0);
     output[nId] = end[nId] - start[nId] + 1;
 }
 
-__kernel void oclFormulaAverage(__global float *input,__global int *start,__global int *end,__global float *output)
+__kernel void oclFormulaAverage(__global fp_t *input,__global int *start,__global int *end,__global fp_t *output)
 {
     const unsigned int id = get_global_id(0);
-    int i=0;
-    float sum=0;
-    for(i = start[id];i<=end[id];i++)
+    fp_t sum=0.0;
+    for(int i = start[id];i<=end[id];i++)
         sum += input[i];
     output[id] = sum / (end[id]-start[id]+1);
 }
 
 //Sumproduct
-__kernel void oclFormulaSumproduct(__global float *firstCol,__global float *secondCol,__global int *start,__global int *end,__global float *output)
+__kernel void oclFormulaSumproduct(__global fp_t *firstCol,__global int* npSumSize,__global fp_t *output,uint nMatixSize)
 {
-    const int nId = get_global_id(0);
-    int nCount     = start[nId] - end[nId] + 1;
-    int nStartA  = start[nId*2];
-    int nStartB  = start[nId*2+1];
-    for(int i = 0; i<nCount; i++)
-        output[nId] += firstCol[nStartA+i]*secondCol[nStartB+i];
+    const unsigned int id = get_global_id(0);
+    unsigned int nSumSize = npSumSize[id];
+    fp_t fSum = 0.0;
+    for(int i=0;i<nSumSize;i++)
+        fSum += firstCol[i + nMatixSize * id];
+    output[id] = fSum;
 }
 
-__kernel void oclFormulaMinverse(__global float *data,
-                       const uint type)
+__kernel void oclFormulaMinverse(__global fp_t *data, const uint type)
 {
 
 }
 
-// Double precision is a requirement of spreadsheets
-// cl_khr_fp64: Khronos extension
-// cl_amd_fp64: AMD extension
-\n#if 0 \n
-\n#if defined(cl_khr_fp64) \n
-\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n
-\n#elif defined(cl_amd_fp64) \n
-\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable \n
-\n#endif \n
-\ntypedef double fp_t; \n
-\n#else \n
-\ntypedef float fp_t; \n
-\n#endif \n
 
 __kernel void oclAverageDelta(__global fp_t *values, __global fp_t *subtract, uint start, uint end, __global fp_t *output)
 {
     const unsigned int id = get_global_id(0);
 
     // Average
-    fp_t fSum = 0.0f;
+    fp_t fSum = 0.0;
     for(int i = start; i < end; i++)
         fSum += values[i];
     fp_t fVal = fSum/(end-start);
@@ -194,7 +138,7 @@ __kernel void oclMaxDelta(__global fp_t *values, __global fp_t *subtract, uint s
     const unsigned int id = get_global_id(0);
 
     // Max
-    float fMaxVal = values[start];
+    fp_t fMaxVal = values[start];
     for(int i=start+1;i < end;i++)
     {
         if(values[i]>fMaxVal)
@@ -210,7 +154,7 @@ __kernel void oclMinDelta(__global fp_t *values, __global fp_t *subtract, uint s
     const unsigned int id = get_global_id(0);
 
     // Min
-    float fMinVal = values[start];
+    fp_t fMinVal = values[start];
     for(int i=start+1;i < end;i++)
     {
         if(values[i]<fMinVal)
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index 03f4228..3d165a8 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -28,18 +28,30 @@ int OpenclDevice::isInited =0;
 #define OPENCL_DLL_NAME "opencllo.dll"
 #define OCLERR -1
 #define OCLSUCCESS 1
+
+#define TRUE 1
+#define FALSE 0
+
+#define OCL_INFO(str) \
+    printf("[OCL_INFO] %s\n",str);
+#define OCL_ERROR(str) \
+    fprintf(stderr,"[OCL_ERROR] %s\n",str);
+#define OCL_CHECK(value1,value2,str) \
+    if(value1!=value2) \
+        fprintf(stderr,"[OCL_ERROR] %s\n",str);
+
 HINSTANCE HOpenclDll = NULL;
-    void *OpenclDll = NULL;
+void * OpenclDll = NULL;
 
 int OpenclDevice::LoadOpencl()
 {
     //fprintf(stderr, " LoadOpenclDllxx... \n");
-    OpenclDll = static_cast<HINSTANCE>(HOpenclDll);
-    OpenclDll = LoadLibrary(OPENCL_DLL_NAME);
-    if (!static_cast<HINSTANCE>(OpenclDll))
+    OpenclDll = static_cast<HINSTANCE>( HOpenclDll );
+    OpenclDll = LoadLibrary( OPENCL_DLL_NAME );
+    if ( !static_cast<HINSTANCE>( OpenclDll ) )
     {
         fprintf(stderr, " Load opencllo.dll failed! \n");
-        FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
+        FreeLibrary( static_cast<HINSTANCE>( OpenclDll ) );
         return OCLERR;
     }
     fprintf(stderr, " Load opencllo.dll successfully!\n");
@@ -49,26 +61,27 @@ int OpenclDevice::LoadOpencl()
 void OpenclDevice::FreeOpenclDll()
 {
     fprintf(stderr, " Free opencllo.dll ... \n");
-    if(!static_cast<HINSTANCE>(OpenclDll))
-        FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
+    if ( !static_cast<HINSTANCE>( OpenclDll ) )
+        FreeLibrary( static_cast<HINSTANCE>( OpenclDll ) );
 }
 #endif
 
 int OpenclDevice::InitEnv()
 {
 #ifdef SAL_WIN32
-    while(1)
+    while( 1 )
     {
-        if(1==LoadOpencl())
-        break;
+        if( 1 == LoadOpencl() )
+            break;
     }
 #endif
-    InitOpenclRunEnv(0,NULL);
+    InitOpenclRunEnv( 0 );
     return 1;
 }
 
-int OpenclDevice::ReleaseOpenclRunEnv() {
-    ReleaseOpenclEnv(&gpuEnv);
+int OpenclDevice::ReleaseOpenclRunEnv()
+{
+    ReleaseOpenclEnv( &gpuEnv );
 #ifdef SAL_WIN32
     FreeOpenclDll();
 #endif
@@ -76,38 +89,36 @@ int OpenclDevice::ReleaseOpenclRunEnv() {
 }
 ///////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////
-inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName)
+inline int OpenclDevice::AddKernelConfig( int kCount, const char *kName )
 {
-    strcpy(gpuEnv.mArrykernelNames[kCount], kName);
+    strcpy( gpuEnv.mArrykernelNames[kCount], kName );
     gpuEnv.mnKernelCount++;
     return 0;
 }
 
 int OpenclDevice::RegistOpenclKernel()
 {
-    if (!gpuEnv.mnIsUserCreated)
-        memset(&gpuEnv, 0, sizeof(gpuEnv));
+    if ( !gpuEnv.mnIsUserCreated )
+        memset( &gpuEnv, 0, sizeof(gpuEnv) );
 
     gpuEnv.mnFileCount = 0; //argc;
     gpuEnv.mnKernelCount = 0UL;
 
-    AddKernelConfig(0, (const char*) "hello");
-    AddKernelConfig(1, (const char*) "oclformula");
-    AddKernelConfig(2, (const char*) "oclFormulaMin");
-    AddKernelConfig(3, (const char*) "oclFormulaMax");
-    AddKernelConfig(4, (const char*) "oclFormulaSum");
-    AddKernelConfig(5, (const char*) "oclFormulaCount");
-    AddKernelConfig(6, (const char*) "oclFormulaAverage");
-    AddKernelConfig(7, (const char*) "oclFormulaSumproduct");
-    AddKernelConfig(8, (const char*) "oclFormulaMinverse");
-
-    AddKernelConfig(9,  (const char*) "oclSignedAdd");
-    AddKernelConfig(10, (const char*) "oclSignedSub");
-    AddKernelConfig(11, (const char*) "oclSignedMul");
-    AddKernelConfig(12, (const char*) "oclSignedDiv");
-    AddKernelConfig(13, (const char*) "oclAverageDelta");
-    AddKernelConfig(14, (const char*) "OclMaxDelta");
-    AddKernelConfig(15, (const char*) "OclMinDelta");
+    AddKernelConfig( 1, (const char*) "oclFormulaMin" );
+    AddKernelConfig( 2, (const char*) "oclFormulaMax" );
+    AddKernelConfig( 3, (const char*) "oclFormulaSum" );
+    AddKernelConfig( 4, (const char*) "oclFormulaCount" );
+    AddKernelConfig( 5, (const char*) "oclFormulaAverage" );
+    AddKernelConfig( 6, (const char*) "oclFormulaSumproduct" );
+    AddKernelConfig( 7, (const char*) "oclFormulaMinverse" );
+
+    AddKernelConfig( 8, (const char*) "oclSignedAdd" );
+    AddKernelConfig( 9, (const char*) "oclSignedSub" );
+    AddKernelConfig( 10, (const char*) "oclSignedMul" );
+    AddKernelConfig( 11, (const char*) "oclSignedDiv" );
+    AddKernelConfig( 12, (const char*) "oclAverageDelta" );
+    AddKernelConfig( 13, (const char*) "oclMaxDelta" );
+    AddKernelConfig( 14, (const char*) "oclMinDelta" );
 
     return 0;
 }
@@ -122,28 +133,36 @@ OpenclDevice::~OpenclDevice()
     //ReleaseOpenclRunEnv();
 }
 
-int OpenclDevice::SetKernelEnv(KernelEnv *envInfo)
+int OpenclDevice::SetKernelEnv( KernelEnv *envInfo )
 {
-    envInfo->mpkContext  = gpuEnv.mpContext;
+    envInfo->mpkContext = gpuEnv.mpContext;
     envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
-    envInfo->mpkProgram  = gpuEnv.mpArryPrograms[0];
+    envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
 
     return 1;
 }
 
-int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName)
+int OpenclDevice::CheckKernelName( KernelEnv *envInfo, const char *kernelName )
 {
     //printf("CheckKernelName,total count of kernels...%d\n", gpuEnv.kernelCount);
     int kCount;
-    for(kCount=0; kCount < gpuEnv.mnKernelCount; kCount++) {
-        if(strcasecmp(kernelName, gpuEnv.mArrykernelNames[kCount]) == 0) {
+    int nFlag = 0;
+    for ( kCount=0; kCount < gpuEnv.mnKernelCount; kCount++ )
+    {
+        if ( strcasecmp( kernelName, gpuEnv.mArrykernelNames[kCount]) == 0 )
+        {
+            nFlag = 1;
             printf("match %s kernel right\n",kernelName);
             break;
         }
     }
+    if ( !nFlag )
+    {
+        printf("can't find kernel: %s\n",kernelName);
+    }
     envInfo->mpkKernel = gpuEnv.mpArryKernels[kCount];
-    strcpy(envInfo->mckKernelName, kernelName);
-    if (envInfo == (KernelEnv *) NULL)
+    strcpy( envInfo->mckKernelName, kernelName );
+    if ( envInfo == (KernelEnv *) NULL )
     {
         printf("get err func and env\n");
         return 0;
@@ -151,33 +170,36 @@ int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName)
     return 1;
 }
 
-int OpenclDevice::ConvertToString(const char *filename, char **source)
+int OpenclDevice::ConvertToString( const char *filename, char **source )
 {
     int file_size;
     size_t result;
     FILE *file = NULL;
     file_size = 0;
     result = 0;
-    file = fopen(filename, "rb+");
+    file = fopen( filename, "rb+" );
     printf("open kernel file %s.\n",filename);
 
-    if (file != NULL) {
+    if ( file != NULL )
+    {
         printf("Open ok!\n");
-        fseek(file, 0, SEEK_END);
+        fseek( file, 0, SEEK_END );
 
-        file_size = ftell(file);
-        rewind(file);
-        *source = (char*) malloc(file_size + 1);
-        if (*source == (char*) NULL) {
+        file_size = ftell( file );
+        rewind( file );
+        *source = (char*) malloc( sizeof(char) * file_size + 1 );
+        if ( *source == (char*) NULL )
+        {
             return 0;
         }
         result = fread(*source, 1, file_size, file);
-        if (result != (size_t) file_size) {
-            free(*source);
+        if ( result != (size_t) file_size )
+        {
+            free( *source );
             return 0;
         }
         (*source)[file_size] = '\0';
-        fclose(file);
+        fclose( file );
 
         return 1;
     }
@@ -185,123 +207,134 @@ int OpenclDevice::ConvertToString(const char *filename, char **source)
     return 0;
 }
 
-int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle)
+int OpenclDevice::BinaryGenerated( const char * clFileName, FILE ** fhandle )
 {
     unsigned int i = 0;
-    cl_int status;
+    cl_int clStatus;
+    int status = 0;
     char *str = NULL;
     FILE *fd = NULL;
     cl_uint numDevices=0;
-    status = clGetDeviceIDs(gpuEnv.mpPlatformID, // platform
-                            CL_DEVICE_TYPE_ALL, // device_type
-                            0, // num_entries
-                            NULL, // devices ID
-                            &numDevices);
-    for (i = 0; i <numDevices; i++) {
+    clStatus = clGetDeviceIDs(gpuEnv.mpPlatformID, // platform
+                              CL_DEVICE_TYPE_GPU, // device_type
+                              0, // num_entries
+                              NULL, // devices ID
+                              &numDevices);
+    for ( i = 0; i < numDevices; i++ )
+    {
         char fileName[256] = { 0 }, cl_name[128] = { 0 };
-        if (gpuEnv.mpArryDevsID[i] != 0) {
+        if ( gpuEnv.mpArryDevsID[i] != 0 )
+        {
             char deviceName[1024];
-            status = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME,sizeof(deviceName), deviceName, NULL);
-            CHECK_OPENCL(status);
-            str = (char*) strstr(clFileName, (char*) ".cl");
-            memcpy(cl_name, clFileName, str - clFileName);
+            clStatus = clGetDeviceInfo( gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL );
+            CHECK_OPENCL( clStatus, "clGetDeviceInfo" );
+            str = (char*) strstr( clFileName, (char*) ".cl" );
+            memcpy( cl_name, clFileName, str - clFileName );
             cl_name[str - clFileName] = '\0';
-            sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
-            fd = fopen(fileName, "rb");
-            status = (fd != NULL) ? 1 : 0;
-            }
+            sprintf( fileName, "./%s-%s.bin", cl_name, deviceName );
+            fd = fopen( fileName, "rb" );
+            status = ( fd != NULL ) ? 1 : 0;
         }
-        if (fd != NULL) {
-            *fhandle = fd;
-            }
-
-        return status;
+    }
+    if ( fd != NULL )
+    {
+        *fhandle = fd;
+    }
+    return status;
 
 }
 
-int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary,
-        size_t numBytes)
+int OpenclDevice::WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes )
 {
     FILE *output = NULL;
-    output = fopen(fileName, "wb");
-    if (output == NULL) {
+    output = fopen( fileName, "wb" );
+    if ( output == NULL )
+    {
         return 0;
     }
 
-    fwrite(birary, 1, numBytes, output);
-    fclose(output);
+    fwrite( birary, sizeof(char), numBytes, output );
+    fclose( output );
 
     return 1;
 
 }
 
-int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
-                                             const char * clFileName)
+int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * clFileName )
 {
-     unsigned int i = 0;
-    cl_int status;
-    size_t *binarySizes;
-    cl_uint numDevices;
+    unsigned int i = 0;
+    cl_int clStatus;
+    size_t *binarySizes, numDevices;
     cl_device_id *mpArryDevsID;
     char **binaries, *str = NULL;
 
-    status = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
-            sizeof(numDevices), &numDevices, NULL);
-    CHECK_OPENCL(status)
+    clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
+                   sizeof(numDevices), &numDevices, NULL );
+    CHECK_OPENCL( clStatus, "clGetProgramInfo" );
 
-    mpArryDevsID = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
-    if (mpArryDevsID == NULL) {
+    mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) * numDevices );
+    if ( mpArryDevsID == NULL )
+    {
         return 0;
     }
     /* grab the handles to all of the devices in the program. */
-    status = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
-            sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL);
-    CHECK_OPENCL(status)
+    clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
+                   sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL );
+    CHECK_OPENCL( clStatus, "clGetProgramInfo" );
 
     /* figure out the sizes of each of the binaries. */
-    binarySizes = (size_t*) malloc(sizeof(size_t) * numDevices);
+    binarySizes = (size_t*) malloc( sizeof(size_t) * numDevices );
 
-    status = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
-            sizeof(size_t) * numDevices, binarySizes, NULL);
-    CHECK_OPENCL(status)
+    clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
+                   sizeof(size_t) * numDevices, binarySizes, NULL );
+    CHECK_OPENCL( clStatus, "clGetProgramInfo" );
 
     /* copy over all of the generated binaries. */
-    binaries = (char**) malloc(sizeof(char *) * numDevices);
-    if (binaries == NULL) {
+    binaries = (char**) malloc( sizeof(char *) * numDevices );
+    if ( binaries == NULL )
+    {
         return 0;
     }
 
-    for (i = 0; i < numDevices; i++) {
-        if (binarySizes[i] != 0) {
-            binaries[i] = (char*) malloc(binarySizes[i]);
-            if (binaries[i] == NULL) {
+    for ( i = 0; i < numDevices; i++ )
+    {
+        if ( binarySizes[i] != 0 )
+        {
+            binaries[i] = (char*) malloc( sizeof(char) * binarySizes[i] );
+            if ( binaries[i] == NULL )
+            {
                 return 0;
             }
-        } else {
+        }
+        else
+        {
             binaries[i] = NULL;
         }
     }
 
-    status = clGetProgramInfo(program, CL_PROGRAM_BINARIES,
-            sizeof(char *) * numDevices, binaries, NULL);
-    CHECK_OPENCL(status)
+    clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
+                   sizeof(char *) * numDevices, binaries, NULL );
+    CHECK_OPENCL(clStatus,"clGetProgramInfo");
 
     /* dump out each binary into its own separate file. */
-    for (i = 0; i < numDevices; i++) {
+    for ( i = 0; i < numDevices; i++ )
+    {
         char fileName[256] = { 0 }, cl_name[128] = { 0 };
 
-        if (binarySizes[i] != 0) {
+        if ( binarySizes[i] != 0 )
+        {
             char deviceName[1024];
-            status = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
-                    sizeof(deviceName), deviceName, NULL);
-            CHECK_OPENCL(status)
+            clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
+                           sizeof(deviceName), deviceName, NULL);
+            CHECK_OPENCL( clStatus, "clGetDeviceInfo" );
 
-            str = (char*) strstr(clFileName, (char*) ".cl");
-            memcpy(cl_name, clFileName, str - clFileName);
+            str = (char*) strstr( clFileName, (char*) ".cl" );
+            memcpy( cl_name, clFileName, str - clFileName );
             cl_name[str - clFileName] = '\0';
-            sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
+            sprintf( fileName, "./%s-%s.bin", cl_name, deviceName );
 
-            if (!WriteBinaryToFile(fileName, binaries[i], binarySizes[i])) {
+            if ( !WriteBinaryToFile( fileName, binaries[i], binarySizes[i] ) )
+            {
                 printf("opencl-wrapper: write binary[%s] failds\n", fileName);
                 return 0;
             } //else
@@ -310,110 +343,121 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
     }
 
     // Release all resouces and memory
-    for (i = 0; i < numDevices; i++) {
-        if (binaries[i] != NULL) {
-            free(binaries[i]);
+    for ( i = 0; i < numDevices; i++ )
+    {
+        if ( binaries[i] != NULL )
+        {
+            free( binaries[i] );
             binaries[i] = NULL;
         }
     }
 
-    if (binaries != NULL) {
-        free(binaries);
+    if ( binaries != NULL )
+    {
+        free( binaries );
         binaries = NULL;
     }
 
-    if (binarySizes != NULL) {
-        free(binarySizes);
+    if ( binarySizes != NULL )
+    {
+        free( binarySizes );
         binarySizes = NULL;
     }
 
-    if (mpArryDevsID != NULL) {
-        free(mpArryDevsID);
+    if ( mpArryDevsID != NULL )
+    {
+        free( mpArryDevsID );
         mpArryDevsID = NULL;
     }
     return 1;
 }
 
-int OpenclDevice::InitOpenclAttr(OpenCLEnv * env)
+int OpenclDevice::InitOpenclAttr( OpenCLEnv * env )
 {
-    if (gpuEnv.mnIsUserCreated)
+    if ( gpuEnv.mnIsUserCreated )
         return 1;
 
-    gpuEnv.mpContext    = env->mpOclContext;
+    gpuEnv.mpContext = env->mpOclContext;
     gpuEnv.mpPlatformID = env->mpOclPlatformID;
-    gpuEnv.mpDevID        = env->mpOclDevsID;
-    gpuEnv.mpCmdQueue    = env->mpOclCmdQueue;
+    gpuEnv.mpDevID = env->mpOclDevsID;
+    gpuEnv.mpCmdQueue = env->mpOclCmdQueue;
 
     gpuEnv.mnIsUserCreated = 1;
 
     return 0;
 }
 
-int OpenclDevice::CreateKernel(char * kernelname, KernelEnv * env)
+int OpenclDevice::CreateKernel( char * kernelname, KernelEnv * env )
 {
-    int status;
+    int clStatus;
 
-    env->mpkKernel   = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelname, &status);
-    env->mpkContext  = gpuEnv.mpContext;
+    env->mpkKernel = clCreateKernel( gpuEnv.mpArryPrograms[0], kernelname, &clStatus );
+    env->mpkContext = gpuEnv.mpContext;
     env->mpkCmdQueue = gpuEnv.mpCmdQueue;
-    return status != CL_SUCCESS ? 1 : 0;
+    return clStatus != CL_SUCCESS ? 1 : 0;
 }
 
-int OpenclDevice::ReleaseKernel(KernelEnv * env)
+int OpenclDevice::ReleaseKernel( KernelEnv * env )
 {
-    int status = clReleaseKernel(env->mpkKernel);
-    return status != CL_SUCCESS ? 1 : 0;
+    int clStatus = clReleaseKernel( env->mpkKernel );
+    return clStatus != CL_SUCCESS ? 1 : 0;
 }
 
-int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo)
+int OpenclDevice::ReleaseOpenclEnv( GPUEnv *gpuInfo )
 {
     int i = 0;
-    int status = 0;
+    int clStatus = 0;
 
-    if (!isInited) {
+    if ( !isInited )
+    {
         return 1;
     }
 
-    for (i = 0; i < gpuEnv.mnFileCount; i++) {
-        if (gpuEnv.mpArryPrograms[i]) {
-            status = clReleaseProgram(gpuEnv.mpArryPrograms[i]);
-            CHECK_OPENCL(status)
+    for ( i = 0; i < gpuEnv.mnFileCount; i++ )
+    {
+        if ( gpuEnv.mpArryPrograms[i] )
+        {
+            clStatus = clReleaseProgram( gpuEnv.mpArryPrograms[i] );
+            CHECK_OPENCL( clStatus, "clReleaseProgram" );
             gpuEnv.mpArryPrograms[i] = NULL;
         }
     }
-    if (gpuEnv.mpCmdQueue) {
-        clReleaseCommandQueue(gpuEnv.mpCmdQueue);
+    if ( gpuEnv.mpCmdQueue )
+    {
+        clReleaseCommandQueue( gpuEnv.mpCmdQueue );
         gpuEnv.mpCmdQueue = NULL;
     }
-    if (gpuEnv.mpContext) {
-        clReleaseContext(gpuEnv.mpContext);
+    if ( gpuEnv.mpContext )
+    {
+        clReleaseContext( gpuEnv.mpContext );
         gpuEnv.mpContext = NULL;
     }
     isInited = 0;
     gpuInfo->mnIsUserCreated = 0;
-    free(gpuInfo->mpArryDevsID);
+    free( gpuInfo->mpArryDevsID );
     return 1;
 }
 
-int OpenclDevice::RunKernelWrapper(cl_kernel_function function,
-                                   const char * kernelName, void **usrdata)
+int OpenclDevice::RunKernelWrapper( cl_kernel_function function, const char * kernelName, void **usrdata )
 {
     printf("oclwrapper:RunKernel_wrapper...\n");
-    if (RegisterKernelWrapper(kernelName, function) != 1) {
-        fprintf(stderr,
-                "Error:RunKernel_wrapper:RegisterKernelWrapper fail!\n");
+    if ( RegisterKernelWrapper( kernelName, function ) != 1 )
+    {
+        fprintf(stderr, "Error:RunKernel_wrapper:RegisterKernelWrapper fail!\n");
         return -1;
     }
-    return (RunKernel(kernelName, usrdata));
+    return ( RunKernel( kernelName, usrdata ) );
 }
 
-int OpenclDevice::CachedOfKernerPrg(const GPUEnv *gpuEnvCached,
-                                    const char * clFileName)
+int OpenclDevice::CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName )
 {
     int i;
-    for (i = 0; i < gpuEnvCached->mnFileCount; i++) {
-        if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) {
-            if (gpuEnvCached->mpArryPrograms[i] != NULL) {
+    for ( i = 0; i < gpuEnvCached->mnFileCount; i++ )
+    {
+        if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 )
+        {
+            if ( gpuEnvCached->mpArryPrograms[i] != NULL )
+            {
                 return 1;
             }
         }
@@ -422,8 +466,9 @@ int OpenclDevice::CachedOfKernerPrg(const GPUEnv *gpuEnvCached,
     return 0;
 }
 
-int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
-    cl_int status;
+int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
+{
+    cl_int clStatus;
     size_t length;
     char *buildLog = NULL, *binary;
     const char *source;
@@ -434,7 +479,8 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
     FILE *fd, *fd1;
     const char* filename = "kernel.cl";
     fprintf(stderr, "CompileKernelFile ... \n");
-    if (CachedOfKernerPrg(gpuInfo, filename) == 1) {
+    if ( CachedOfKernerPrg(gpuInfo, filename) == 1 )
+    {
         return 1;
     }
 
@@ -442,133 +488,156 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
 
     source = kernel_src;
 
-    source_size[0] = strlen(source);
+    source_size[0] = strlen( source );
     binaryExisted = 0;
-    if ((binaryExisted = BinaryGenerated(filename, &fd)) == 1) {
-        status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
-                sizeof(numDevices), &numDevices, NULL);
-        CHECK_OPENCL(status)
+    if ( ( binaryExisted = BinaryGenerated( filename, &fd ) ) == 1 )
+    {
+        clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
+                       sizeof(numDevices), &numDevices, NULL );
+        CHECK_OPENCL( clStatus, "clGetContextInfo" );
 
-        mpArryDevsID = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
-        if (mpArryDevsID == NULL) {
+        mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) * numDevices );
+        if ( mpArryDevsID == NULL )
+        {
             return 0;
         }
 
         b_error = 0;
         length = 0;
-        b_error |= fseek(fd, 0, SEEK_END) < 0;
-        b_error |= (length = ftell(fd)) <= 0;
-        b_error |= fseek(fd, 0, SEEK_SET) < 0;
-        if (b_error) {
+        b_error |= fseek( fd, 0, SEEK_END ) < 0;
+        b_error |= ( length = ftell(fd) ) <= 0;
+        b_error |= fseek( fd, 0, SEEK_SET ) < 0;
+        if ( b_error )
+        {
             return 0;
         }
 
-        binary = (char*) malloc(length);
-        if (!binary) {
+        binary = (char*) malloc( length + 2 );
+        if ( !binary )
+        {
             return 0;
         }
 
-        memset(binary, 0, length);
-        b_error |= fread(binary, 1, length, fd) != length;
+        memset( binary, 0, length + 2 );
+        b_error |= fread( binary, 1, length, fd ) != length;
+        if ( binary[length - 1] != '\n' )
+        {
+            binary[length++] = '\n';
+        }
 
-        fclose(fd);
+        fclose( fd );
         fd = NULL;
         // grab the handles to all of the devices in the context.
-        status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES,
-                sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL);
-        CHECK_OPENCL(status)
+        clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
+                       sizeof( cl_device_id ) * numDevices, mpArryDevsID, NULL );
+        CHECK_OPENCL( clStatus, "clGetContextInfo" );
 
         fprintf(stderr, "Create kernel from binary\n");
-        gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary(gpuInfo->mpContext,
-                numDevices, mpArryDevsID, &length, (const unsigned char**) &binary,
-                &binary_status, &status);
-        CHECK_OPENCL(status)
+        gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
+                                           mpArryDevsID, &length, (const unsigned char**) &binary,
+                                           &binary_status, &clStatus );
+        CHECK_OPENCL( clStatus, "clCreateProgramWithBinary" );
 
-        free(binary);
-        free(mpArryDevsID);
+        free( binary );
+        free( mpArryDevsID );
         mpArryDevsID = NULL;
-    } else {
+    }
+    else
+    {
         // create a CL program using the kernel source
         fprintf(stderr, "Create kernel from source\n");
-        gpuEnv.mpArryPrograms[idx] = clCreateProgramWithSource(gpuEnv.mpContext,
-                1, &source, source_size, &status);
-        CHECK_OPENCL(status);
+        gpuEnv.mpArryPrograms[idx] = clCreateProgramWithSource( gpuEnv.mpContext, 1, &source,
+                                         source_size, &clStatus);
+        CHECK_OPENCL( clStatus, "clCreateProgramWithSource" );
     }
 
-    if (gpuInfo->mpArryPrograms[idx] == (cl_program) NULL) {
+    if ( gpuInfo->mpArryPrograms[idx] == (cl_program) NULL )
+    {
         return 0;
     }
 
     //char options[512];
     // create a cl program executable for all the devices specified
     printf("BuildProgram.\n");
-    if (!gpuInfo->mnIsUserCreated) {
-        status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
-                                buildOption, NULL, NULL);
-    } else {
-        status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
-                                buildOption, NULL, NULL);
+    if (!gpuInfo->mnIsUserCreated)
+    {
+        clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
+                       buildOption, NULL, NULL);
+    }
+    else
+    {
+        clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
+                       buildOption, NULL, NULL);
     }
 
-    if (status != CL_SUCCESS) {
+    if ( clStatus != CL_SUCCESS )
+    {
         printf ("BuildProgram error!\n");
-        if (!gpuInfo->mnIsUserCreated) {
-            status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
-                                           gpuInfo->mpArryDevsID[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
-                                           &length);
-        } else {
-            status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
-                                           gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
+        if ( !gpuInfo->mnIsUserCreated )
+        {
+            clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
+                           CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+        }
+        else
+        {
+            clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
+                           CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
         }
-        if (status != CL_SUCCESS) {
+        if ( clStatus != CL_SUCCESS )
+        {
             printf("opencl create build log fail\n");
             return 0;
         }
-        buildLog = (char*) malloc(length);
-        if (buildLog == (char*) NULL) {
+        buildLog = (char*) malloc( length );
+        if ( buildLog == (char*) NULL )
+        {
             return 0;
         }
-        if (!gpuInfo->mnIsUserCreated) {
-            status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
-                    gpuInfo->mpArryDevsID[0], CL_PROGRAM_BUILD_LOG, length,
-                    buildLog, &length);
-        } else {
-            status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
-                    gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, length, buildLog,
-                    &length);
+        if ( !gpuInfo->mnIsUserCreated )
+        {
+            clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
+                           CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
         }
-        if (status != CL_SUCCESS) {
+        else
+        {
+            clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
+                           CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
+        }
+        if ( clStatus != CL_SUCCESS )
+        {
             printf("opencl program build info fail\n");
             return 0;
         }
 
-        fd1 = fopen("kernel-build.log", "w+");
-        if (fd1 != NULL) {
-            fwrite(buildLog, 1, length, fd1);
-            fclose(fd1);
+        fd1 = fopen( "kernel-build.log", "w+" );
+        if ( fd1 != NULL )
+        {
+            fwrite( buildLog, sizeof(char), length, fd1 );
+            fclose( fd1 );
         }
 
-        free(buildLog);
+        free( buildLog );
         return 0;
     }
 
-    strcpy(gpuEnv.mArryKnelSrcFile[idx], filename);
+    strcpy( gpuEnv.mArryKnelSrcFile[idx], filename );
 
-    if (binaryExisted == 0)
-        GeneratBinFromKernelSource(gpuEnv.mpArryPrograms[idx], filename);
+    if ( binaryExisted == 0 )
+        GeneratBinFromKernelSource( gpuEnv.mpArryPrograms[idx], filename );
 
     gpuInfo->mnFileCount += 1;
 
     return 1;
-
-
 }
-int OpenclDevice::GetKernelEnvAndFunc(const char *kernelName,
-        KernelEnv *env, cl_kernel_function *function) {
-    int i; //,program_idx ;
+
+int OpenclDevice::GetKernelEnvAndFunc( const char *kernelName, KernelEnv *env, cl_kernel_function *function)
+{
+    int i;
     //printf("----------------OpenclDevice::GetKernelEnvAndFunc\n");
-    for (i = 0; i < gpuEnv.mnKernelCount; i++) {
-        if (strcasecmp(kernelName, gpuEnv.mArrykernelNames[i]) == 0) {
+    for ( i = 0; i < gpuEnv.mnKernelCount; i++ )
+    {
+        if ( strcasecmp( kernelName, gpuEnv.mArrykernelNames[i]) == 0 )
+        {
             env->mpkContext = gpuEnv.mpContext;
             env->mpkCmdQueue = gpuEnv.mpCmdQueue;
             env->mpkProgram = gpuEnv.mpArryPrograms[0];
@@ -580,48 +649,68 @@ int OpenclDevice::GetKernelEnvAndFunc(const char *kernelName,
     return 0;
 }
 
-int OpenclDevice::RunKernel(const char *kernelName, void **userdata) {
+int OpenclDevice::RunKernel( const char *kernelName, void **userdata)
+{
     KernelEnv kEnv;
-
     cl_kernel_function function;
-
     int status;
 
-    memset(&kEnv, 0, sizeof(KernelEnv));
-    status = GetKernelEnvAndFunc(kernelName, &kEnv, &function);
-    strcpy(kEnv.mckKernelName, kernelName);
-    if (status == 1) {
-        if (&kEnv == (KernelEnv *) NULL
-                || &function == (cl_kernel_function *) NULL) {
+    memset( &kEnv, 0, sizeof( KernelEnv ) );
+    status = GetKernelEnvAndFunc( kernelName, &kEnv, &function );
+    strcpy( kEnv.mckKernelName, kernelName );
+    if ( status == 1 )
+    {
+        if ( &kEnv == (KernelEnv *) NULL || &function == (cl_kernel_function *) NULL)
             return 0;
-        }
-        return (function(userdata, &kEnv));
+        return ( function( userdata, &kEnv ) );
     }
     return 0;
 }
 
-int OpenclDevice::InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles)
+int OpenclDevice::InitOpenclRunEnv( int argc )
 {
     int status = 0;
-    if (MAX_CLKERNEL_NUM <= 0) {
+    if ( MAX_CLKERNEL_NUM <= 0 )
+    {
         return 1;
     }
-    if ((argc > MAX_CLFILE_NUM) || (argc < 0)) {
+    if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
         return 1;
-    }
 
-    if (!isInited) {
+    if ( !isInited )
+    {
         RegistOpenclKernel();
         //initialize devices, context, comand_queue
-        status = InitOpenclRunEnv(&gpuEnv);
-        if (status) {
+        status = InitOpenclRunEnv( &gpuEnv );
+        if ( status )
+        {
             printf("init_opencl_env failed.\n");
             return 1;
         }
         printf("init_opencl_env successed.\n");
         //initialize program, kernelName, kernelCount
-        status = CompileKernelFile( &gpuEnv, buildOptionKernelfiles);
-        if (status == 0 || gpuEnv.mnKernelCount == 0) {
+        if( getenv( "SC_FLOAT" ) )
+        {
+            gpuEnv.mnKhrFp64Flag = 0;
+            gpuEnv.mnAmdFp64Flag = 0;
+        }
+        if( gpuEnv.mnKhrFp64Flag )
+        {
+            printf("----use khr double type in kernel----\n");
+            status = CompileKernelFile( &gpuEnv, "-D KHR_DP_EXTENSION -Dfp_t=double" );
+        }
+        else if( gpuEnv.mnAmdFp64Flag )
+        {
+            printf("----use amd double type in kernel----\n");
+            status = CompileKernelFile( &gpuEnv, "-D AMD_DP_EXTENSION -Dfp_t=double" );
+        }
+        else
+        {
+            printf("----use float type in kernel----\n");
+            status = CompileKernelFile( &gpuEnv, "-Dfp_t=float" );
+        }
+        if ( status == 0 || gpuEnv.mnKernelCount == 0 )
+        {
             printf("CompileKernelFile failed.\n");
             return 1;
         }
@@ -631,10 +720,10 @@ int OpenclDevice::InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles)
     return 0;
 }
 
-int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
+int OpenclDevice::InitOpenclRunEnv( GPUEnv *gpuInfo )
 {
     size_t length;
-    cl_int status;
+    cl_int clStatus;
     cl_uint numPlatforms, numDevices;
     cl_platform_id *platforms;
     cl_context_properties cps[3];
@@ -643,30 +732,36 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
 
     // Have a look at the available platforms.
 
-    if (!gpuInfo->mnIsUserCreated) {
-        status = clGetPlatformIDs(0, NULL, &numPlatforms);
-        if (status != CL_SUCCESS) {
+    if ( !gpuInfo->mnIsUserCreated )
+    {
+        clStatus = clGetPlatformIDs( 0, NULL, &numPlatforms );
+        if ( clStatus != CL_SUCCESS )
+        {
             return 1;
         }
         gpuInfo->mpPlatformID = NULL;
 
-        if (0 < numPlatforms) {
-            platforms = (cl_platform_id*) malloc(
-                    numPlatforms * sizeof(cl_platform_id));
-            if (platforms == (cl_platform_id*) NULL) {
+        if ( 0 < numPlatforms )
+        {
+            platforms = (cl_platform_id*) malloc( numPlatforms * sizeof( cl_platform_id ) );
+            if ( platforms == (cl_platform_id*) NULL )
+            {
                 return 1;
             }
-            status = clGetPlatformIDs(numPlatforms, platforms, NULL);
+            clStatus = clGetPlatformIDs( numPlatforms, platforms, NULL );
 
-            if (status != CL_SUCCESS) {
+            if ( clStatus != CL_SUCCESS )
+            {
                 return 1;
             }
 
-            for (i = 0; i < numPlatforms; i++) {
-                status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
-                        sizeof(platformName), platformName, NULL);
+            for ( i = 0; i < numPlatforms; i++ )
+            {
+                clStatus = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,
+                    sizeof( platformName ), platformName, NULL );
 
-                if (status != CL_SUCCESS) {
+                if ( clStatus != CL_SUCCESS )
+                {
                     return 1;
                 }
                 gpuInfo->mpPlatformID = platforms[i];
@@ -676,96 +771,105 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
                 {
                     gpuInfo->mpPlatformID = platforms[i];
 
-                    status = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform
-                                            CL_DEVICE_TYPE_ALL,    // device_type
-                                            0,                       // num_entries
-                                            NULL,                   // devices
-                                            &numDevices);
+                    clStatus = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform
+                                              CL_DEVICE_TYPE_GPU,    // device_type
+                                              0,                     // num_entries
+                                              NULL,                  // devices
+                                              &numDevices);
 
-                    if (status != CL_SUCCESS) {
+                    if ( clStatus != CL_SUCCESS )
                         continue;
-                    }
 
-                    if (numDevices) {
+                    if ( numDevices )
                         break;
-                    }
                 }
             }
-            if(status!=CL_SUCCESS)
+            if ( clStatus != CL_SUCCESS )
                 return 1;
-            free(platforms);
+            free( platforms );
         }
-        if (NULL == gpuInfo->mpPlatformID) {
+        if ( NULL == gpuInfo->mpPlatformID )
             return 1;
-        }
 
         // Use available platform.
-
         cps[0] = CL_CONTEXT_PLATFORM;
         cps[1] = (cl_context_properties) gpuInfo->mpPlatformID;
         cps[2] = 0;
         // Check for GPU.
         gpuInfo->mDevType = CL_DEVICE_TYPE_GPU;
-        gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType, NULL,
-                NULL, &status);
+        gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
 
-        // If no GPU, check for CPU.
-        if ((gpuInfo->mpContext == (cl_context) NULL)
-                || (status != CL_SUCCESS)) {
+        if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
+        {
             gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
-            gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType,
-                    NULL, NULL, &status);
+            gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
         }
-
-        // If no GPU or CPU, check for a "default" type.
-        if ((gpuInfo->mpContext == (cl_context) NULL)
-                || (status != CL_SUCCESS)) {
+        if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
+        {
             gpuInfo->mDevType = CL_DEVICE_TYPE_DEFAULT;
-            gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType,
-                    NULL, NULL, &status);
+            gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
         }
-        if ((gpuInfo->mpContext == (cl_context) NULL)
-                || (status != CL_SUCCESS)) {
+        if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
             return 1;
-        }
         // Detect OpenCL devices.
         // First, get the size of device list data
-        status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, 0,
-                NULL, &length);
-        if ((status != CL_SUCCESS) || (length == 0)) {
+        clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, 0, NULL, &length );
+        if ( ( clStatus != CL_SUCCESS ) || ( length == 0 ) )
             return 1;
-        }
         // Now allocate memory for device list based on the size we got earlier
-        gpuInfo->mpArryDevsID = (cl_device_id*) malloc(length);
-        if (gpuInfo->mpArryDevsID == (cl_device_id*) NULL) {
+        gpuInfo->mpArryDevsID = (cl_device_id*) malloc( length );
+        if ( gpuInfo->mpArryDevsID == (cl_device_id*) NULL )
             return 1;
-        }
         // Now, get the device list data
-        status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, length,
-                gpuInfo->mpArryDevsID, NULL);
-        if (status != CL_SUCCESS) {
+        clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, length,
+                       gpuInfo->mpArryDevsID, NULL );
+        if ( clStatus != CL_SUCCESS )
             return 1;
-        }
 
         // Create OpenCL command queue.
-        gpuInfo->mpCmdQueue = clCreateCommandQueue(gpuInfo->mpContext,
-                gpuInfo->mpArryDevsID[0], 0, &status);
+        gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpArryDevsID[0], 0, &clStatus );
 
-        if (status != CL_SUCCESS) {
+        if ( clStatus != CL_SUCCESS )
             return 1;
-        }
     }
 
-    return 0;
+    clStatus = clGetCommandQueueInfo( gpuInfo->mpCmdQueue, CL_QUEUE_THREAD_HANDLE_AMD, 0, NULL, NULL );
+    // Check device extensions for double type
+    size_t aDevExtInfoSize = 0;
+
+    clStatus = clGetDeviceInfo( gpuInfo->mpArryDevsID[0], CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize );
+    CHECK_OPENCL( clStatus, "clGetDeviceInfo" );
+
+    char *aExtInfo = new char[aDevExtInfoSize];
+
+    clStatus = clGetDeviceInfo( gpuInfo->mpArryDevsID[0], CL_DEVICE_EXTENSIONS,
+                   sizeof(char) * aDevExtInfoSize, aExtInfo, NULL);
+    CHECK_OPENCL( clStatus, "clGetDeviceInfo" );
+
+    gpuInfo->mnKhrFp64Flag = 0;
+    gpuInfo->mnAmdFp64Flag = 0;
 
+    if ( strstr( aExtInfo, "cl_khr_fp64" ) )
+    {
+        gpuInfo->mnKhrFp64Flag = 1;
+    }
+    else
+    {
+        // Check if cl_amd_fp64 extension is supported
+        if ( strstr( aExtInfo, "cl_amd_fp64" ) )
+            gpuInfo->mnAmdFp64Flag = 1;
+    }
+    delete []aExtInfo;
+
+    return 0;
 }
-int OpenclDevice::RegisterKernelWrapper(const char *kernelName,cl_kernel_function function)
+int OpenclDevice::RegisterKernelWrapper( const char *kernelName, cl_kernel_function function )
 {
     int i;
     //printf("oclwrapper:RegisterKernelWrapper...%d\n", gpuEnv.mnKernelCount);
-    for (i = 0; i < gpuEnv.mnKernelCount; i++)
+    for ( i = 0; i < gpuEnv.mnKernelCount; i++ )
     {
-        if (strcasecmp(kernelName, gpuEnv.mArrykernelNames[i]) == 0)
+        if ( strcasecmp( kernelName, gpuEnv.mArrykernelNames[i]) == 0 )
         {
             gpuEnv.mpArryKnelFuncs[i] = function;
             return 1;
@@ -774,190 +878,22 @@ int OpenclDevice::RegisterKernelWrapper(const char *kernelName,cl_kernel_functio
     return 0;
 }
 
-
-void OpenclDevice::SetOpenclState(int state)
+void OpenclDevice::SetOpenclState( int state )
 {
-     //printf("OpenclDevice::setOpenclState...\n");
-     isInited = state;
+    //printf("OpenclDevice::setOpenclState...\n");
+    isInited = state;
 }
 
 int OpenclDevice::GetOpenclState()
 {
     return isInited;
 }
-//ocldbg
-
-int OclFormulax(void ** usrdata, KernelEnv *env) {
-    fprintf(stderr, "In OpenclDevice,...Formula_proc\n");
-    cl_int clStatus;
-    int status;
-    size_t global_work_size[1];
-    float tdata[NUM];
-
-    double *data = (double *) usrdata[0];
-    const formulax type = *((const formulax *) usrdata[1]);
-    double ret = 0.0;
-
-    for (int i = 0; i < NUM; i++) {
-        tdata[i] = (float) data[i];
-    }
-
-    env->mpkKernel = clCreateKernel(env->mpkProgram, "oclformula", &clStatus);
-    //printf("ScInterpreter::IterateParameters...after clCreateKernel.\n");
-    //fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n");
-    int size = NUM;
-
-    cl_mem formula_data = clCreateBuffer(env->mpkContext,
-            (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
-            size * sizeof(float), (void *) tdata, &clStatus);
-    //fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n");
-
-    status = clSetKernelArg(env->mpkKernel, 0, sizeof(cl_mem),
-            (void *) &formula_data);
-    CHECK_OPENCL(status)
-    status = clSetKernelArg(env->mpkKernel, 1, sizeof(unsigned int),
-            (void *) &type);
-    CHECK_OPENCL(status)
-
-    global_work_size[0] = size;
-    //fprintf(stderr, "\nIn OpenclDevice,...after global_work_size\n");
-    //PPAStartCpuEvent(ppa_proc);
-
-    while (global_work_size[0] != 1) {
-        global_work_size[0] = global_work_size[0] / 2;
-        status = clEnqueueNDRangeKernel(env->mpkCmdQueue, env->mpkKernel, 1,
-                NULL, global_work_size, NULL, 0, NULL, NULL);
-        CHECK_OPENCL(status)
-
-    }
-    //fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n");
-    status = clEnqueueReadBuffer(env->mpkCmdQueue, formula_data, CL_FALSE, 0,
-            sizeof(float), (void *) &tdata, 0, NULL, NULL);
-    CHECK_OPENCL(status)
-    status = clFinish(env->mpkCmdQueue);
-    CHECK_OPENCL(status)
-
-    //PPAStopCpuEvent(ppa_proc);
-    //fprintf(stderr, "\nIn OpenclDevice,...before clReleaseKernel\n");
-    status = clReleaseKernel(env->mpkKernel);
-    CHECK_OPENCL(status)
-    status = clReleaseMemObject(formula_data);
-    CHECK_OPENCL(status)
-
-    if (type == AVG)
-        ret = (double) tdata[0] / NUM;
-    else
-        ret = (double) tdata[0];
-
-    printf("size = %d ret = %f.\n\n", NUM, ret);
-
-    return 0;
-}
-
-int OclFormulaxDll(void ** usrdata, KernelEnv *env) {
-
-    fprintf(stderr, "In OclFormulaxDll...\n");
-    cl_int clStatus;
-    int status;
-    size_t global_work_size[1];
-    float tdata[NUM];
-
-    double *data = (double *) usrdata[0];
-    const formulax type = *((const formulax *) usrdata[1]);
-    double ret = 0.0;
-
-    for (int i = 0; i < NUM; i++) {
-        tdata[i] = (float) data[i];
-    }
-
-    env->mpkKernel = clCreateKernel(env->mpkProgram, "oclformula", &clStatus);
-    //printf("ScInterpreter::IterateParameters...after clCreateKernel.\n");
-    //fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n");
-    int size = NUM;
-
-    cl_mem formula_data = clCreateBuffer(env->mpkContext,
-            (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
-            size * sizeof(float), (void *) tdata, &clStatus);
-    //fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n");
-
-    status = clSetKernelArg(env->mpkKernel, 0, sizeof(cl_mem),
-            (void *) &formula_data);
-    CHECK_OPENCL(status)
-    status = clSetKernelArg(env->mpkKernel, 1, sizeof(unsigned int),
-            (void *) &type);
-    CHECK_OPENCL(status)
-
-    global_work_size[0] = size;
-    //fprintf(stderr, "\nIn OpenclDevice,...after global_work_size\n");
-    //PPAStartCpuEvent(ppa_proc);
-
-    while (global_work_size[0] != 1) {
-        global_work_size[0] = global_work_size[0] / 2;

... etc. - the rest is truncated


More information about the Libreoffice-commits mailing list