[Libreoffice-commits] core.git: Branch 'libreoffice-4-2' - sc/source

Kohei Yoshida kohei.yoshida at collabora.com
Sat Feb 15 18:35:36 CET 2014


 sc/source/core/data/dociter.cxx |  388 ++++++++++++++++++++++++++++------------
 1 file changed, 280 insertions(+), 108 deletions(-)

New commits:
commit ff53d15a6633b1188e97b8b15a71190f072afeea
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date:   Thu Feb 13 14:45:47 2014 -0500

    fdo#74558: Re-implement BinarySearch() to disregard empty cell blocks.
    
    The old code before the calc core refactoring took advantage of the old
    calc storage which did not store empty cells at all.  The new storage does
    "store" empty cell blocks, and it did negatively affect the binary search
    used for VLOOKUP.  The revised binary search code properly skips empty cell
    blocks so that it will work more like the old algorithm in 4.1.
    
    This change also fixes fdo#72348, which was caused by the same root cause.
    
    Change-Id: Ic20cc54d8dae26b62f5e9193cd21dad06ad10a3e
    (cherry picked from commit 377d749ff8211fa9e69d47a92810af9bc0652990)
    Reviewed-on: https://gerrit.libreoffice.org/8042
    Reviewed-by: Norbert Thiebaud <nthiebaud at gmail.com>
    Tested-by: Norbert Thiebaud <nthiebaud at gmail.com>

diff --git a/sc/source/core/data/dociter.cxx b/sc/source/core/data/dociter.cxx
index d93ee06..5758c52 100644
--- a/sc/source/core/data/dociter.cxx
+++ b/sc/source/core/data/dociter.cxx
@@ -62,6 +62,14 @@ void incBlock(std::pair<_Iter, size_t>& rPos)
 }
 
 template<typename _Iter>
+void decBlock(std::pair<_Iter, size_t>& rPos)
+{
+    // Move to the last element of the previous block.
+    --rPos.first;
+    rPos.second = rPos.first->size - 1;
+}
+
+template<typename _Iter>
 void incPos(std::pair<_Iter, size_t>& rPos)
 {
     if (rPos.second + 1 < rPos.first->size)
@@ -1434,6 +1442,204 @@ bool ScQueryCellIterator::FindEqualOrSortedLastInRange( SCCOL& nFoundCol,
     return (nFoundCol <= MAXCOL) && (nFoundRow <= MAXROW);
 }
 
+namespace {
+
+/**
+ * This class sequentially indexes non-empty cells in order, from the top of
+ * the block where the start row position is, to the bottom of the block
+ * where the end row position is.  It skips all empty blocks that may be
+ * present in between.
+ *
+ * The index value is an offset from the first element of the first block
+ * disregarding all empty cell blocks.
+ */
+class NonEmptyCellIndexer
+{
+    typedef std::map<size_t, sc::CellStoreType::const_iterator> BlockMapType;
+
+    BlockMapType maBlockMap;
+
+    const sc::CellStoreType& mrCells;
+    SCROW mnStartRow;
+    SCROW mnEndRow;
+
+    size_t mnLowIndex;
+    size_t mnHighIndex;
+
+    bool mbValid;
+
+public:
+
+    typedef std::pair<ScRefCellValue, SCROW> CellType;
+
+    /**
+     * @param rCells cell storage container
+     * @param nStartRow logical start row position
+     * @param nEndRow logical end row position, inclusive.
+     * @param bSkipTopStrBlock when true, skip all leading string cells.
+     */
+    NonEmptyCellIndexer(
+        const sc::CellStoreType& rCells, SCROW nStartRow, SCROW nEndRow, bool bSkipTopStrBlock ) :
+        mrCells(rCells), mnStartRow(nStartRow), mnEndRow(nEndRow), mnLowIndex(0), mnHighIndex(0), mbValid(true)
+    {
+        if (nEndRow < nStartRow)
+        {
+            mbValid = false;
+            return;
+        }
+
+        // Find the low position.
+
+        sc::CellStoreType::const_position_type aLoPos = mrCells.position(nStartRow);
+        if (aLoPos.first->type == sc::element_type_empty)
+            incBlock(aLoPos);
+
+        if (aLoPos.first == rCells.end())
+        {
+            mbValid = false;
+            return;
+        }
+
+        if (bSkipTopStrBlock)
+        {
+            // Skip all leading string or empty blocks.
+            while (aLoPos.first->type == sc::element_type_string ||
+                   aLoPos.first->type == sc::element_type_edittext ||
+                   aLoPos.first->type == sc::element_type_empty)
+            {
+                incBlock(aLoPos);
+                if (aLoPos.first == rCells.end())
+                {
+                    mbValid = false;
+                    return;
+                }
+            }
+        }
+
+        SCROW nFirstRow = aLoPos.first->position;
+        SCROW nLastRow = aLoPos.first->position + aLoPos.first->size - 1;
+
+        if (nFirstRow > nEndRow)
+        {
+            // Both start and end row positions are within the leading skipped
+            // blocks.
+            mbValid = false;
+            return;
+        }
+
+        // Calculate the index of the low position.
+        if (nFirstRow < nStartRow)
+            mnLowIndex = nStartRow - nFirstRow;
+        else
+        {
+            // Start row is within the skipped block(s). Set it to the first
+            // element of the low block.
+            mnLowIndex = 0;
+        }
+
+        if (nEndRow < nLastRow)
+        {
+            assert(nEndRow > nFirstRow);
+            mnHighIndex = nEndRow - nFirstRow;
+
+            maBlockMap.insert(BlockMapType::value_type(aLoPos.first->size, aLoPos.first));
+            return;
+        }
+
+        // Find the high position.
+
+        sc::CellStoreType::const_position_type aHiPos = mrCells.position(aLoPos.first, nEndRow);
+        if (aHiPos.first->type == sc::element_type_empty)
+        {
+            // Move to the last position of the previous block.
+            decBlock(aHiPos);
+
+            if (aHiPos.first == mrCells.begin())
+            {
+                mbValid = false;
+                return;
+            }
+        }
+
+        // Tag the start and end blocks, and all blocks in between in order
+        // but skip all empty blocks.
+
+        size_t nPos = 0;
+        sc::CellStoreType::const_iterator itBlk = aLoPos.first;
+        while (itBlk != aHiPos.first)
+        {
+            if (itBlk->type == sc::element_type_empty)
+            {
+                ++itBlk;
+                continue;
+            }
+
+            nPos += itBlk->size;
+            maBlockMap.insert(BlockMapType::value_type(nPos, itBlk));
+            ++itBlk;
+
+            if (itBlk->type == sc::element_type_empty)
+                ++itBlk;
+
+            assert(itBlk != mrCells.end());
+        }
+
+        assert(itBlk == aHiPos.first);
+        nPos += itBlk->size;
+        maBlockMap.insert(BlockMapType::value_type(nPos, itBlk));
+
+        // Calculate the high index.
+        BlockMapType::const_reverse_iterator ri = maBlockMap.rbegin();
+        mnHighIndex = ri->first;
+        mnHighIndex -= ri->second->size;
+        mnHighIndex += aHiPos.second;
+    }
+
+    sc::CellStoreType::const_position_type getPosition( size_t nIndex ) const
+    {
+        assert(mbValid);
+        assert(mnLowIndex <= nIndex);
+        assert(nIndex <= mnHighIndex);
+
+        sc::CellStoreType::const_position_type aRet(mrCells.end(), 0);
+
+        BlockMapType::const_iterator it = maBlockMap.upper_bound(nIndex);
+        if (it == maBlockMap.end())
+            return aRet;
+
+        sc::CellStoreType::const_iterator itBlk = it->second;
+        size_t nBlkIndex = it->first - itBlk->size; // index of the first element of the block.
+        assert(nBlkIndex <= nIndex);
+        assert(nIndex < it->first);
+
+        size_t nOffset = nIndex - nBlkIndex;
+        aRet.first = itBlk;
+        aRet.second = nOffset;
+        return aRet;
+    }
+
+    CellType getCell( size_t nIndex ) const
+    {
+        std::pair<ScRefCellValue, SCROW> aRet;
+        aRet.second = -1;
+
+        sc::CellStoreType::const_position_type aPos = getPosition(nIndex);
+        if (aPos.first == mrCells.end())
+            return aRet;
+
+        aRet.first = sc::toRefCell(aPos.first, aPos.second);
+        aRet.second = aPos.first->position + aPos.second;
+        return aRet;
+    }
+
+    size_t getLowIndex() const { return mnLowIndex; }
+
+    size_t getHighIndex() const { return mnHighIndex; }
+
+    bool isValid() const { return mbValid; }
+};
+
+}
 
 bool ScQueryCellIterator::BinarySearch()
 {
@@ -1446,9 +1652,6 @@ bool ScQueryCellIterator::BinarySearch()
     if (pCol->IsEmptyData())
         return false;
 
-    PositionType aHiPos, aLoPos;
-    ScRefCellValue aCell;
-
     CollatorWrapper* pCollator = (mpParam->bCaseSens ? ScGlobal::GetCaseCollator() :
         ScGlobal::GetCollator());
     SvNumberFormatter& rFormatter = *(pDoc->GetFormatTable());
@@ -1462,78 +1665,67 @@ bool ScQueryCellIterator::BinarySearch()
 
     nRow = mpParam->nRow1;
     if (mpParam->bHasHeader)
-        nRow++;
+        ++nRow;
 
-    aLoPos = pCol->maCells.position(nRow);
-    if (bFirstStringIgnore && aLoPos.first->type == sc::element_type_string)
+    ScRefCellValue aCell;
+    if (bFirstStringIgnore)
     {
-        OUString aCellStr;
-        sal_uLong nFormat = pCol->GetNumberFormat(toLogicalPos(aLoPos));
-        aCell = sc::toRefCell(aLoPos.first, aLoPos.second);
-        ScCellFormat::GetInputString(aCell, nFormat, aCellStr, rFormatter, pDoc);
-        sal_Int32 nTmp = pCollator->compareString(aCellStr, rEntry.GetQueryItem().maString.getString());
-        if ((rEntry.eOp == SC_LESS_EQUAL && nTmp > 0) ||
-                (rEntry.eOp == SC_GREATER_EQUAL && nTmp < 0) ||
-                (rEntry.eOp == SC_EQUAL && nTmp != 0))
-        {
-            // Skip the first string value at low point.
-            incPos(aLoPos);
+        sc::CellStoreType::const_position_type aPos = pCol->maCells.position(nRow);
+        if (aPos.first->type == sc::element_type_string || aPos.first->type == sc::element_type_edittext)
+        {
+            aCell = sc::toRefCell(aPos.first, aPos.second);
+            sal_uLong nFormat = pCol->GetNumberFormat(nRow);
+            OUString aCellStr;
+            ScCellFormat::GetInputString(aCell, nFormat, aCellStr, rFormatter, pDoc);
+            sal_Int32 nTmp = pCollator->compareString(aCellStr, rEntry.GetQueryItem().maString.getString());
+            if ((rEntry.eOp == SC_LESS_EQUAL && nTmp > 0) ||
+                    (rEntry.eOp == SC_GREATER_EQUAL && nTmp < 0) ||
+                    (rEntry.eOp == SC_EQUAL && nTmp != 0))
+                ++nRow;
         }
     }
 
-    aHiPos = pCol->maCells.position(mpParam->nRow2);
-    if (bAllStringIgnore)
-    {
-        // Skip all string cells, but never go past the high point.
-        if (aLoPos.first->type == sc::element_type_string)
-        {
-            if (aLoPos.first == pCol->maCells.end())
-                // This is the last block. Move to the last element in this block.
-                aLoPos.second = aLoPos.first->size - 1;
-            else
-                // Move to the next block.
-                incBlock(aLoPos);
-        }
+    NonEmptyCellIndexer aIndexer(pCol->maCells, nRow, mpParam->nRow2, bAllStringIgnore);
+    if (!aIndexer.isValid())
+        return false;
 
-        if (toLogicalPos(aLoPos) > toLogicalPos(aHiPos))
-            // Never go past the high point.
-            aLoPos = aHiPos;
-    }
+    size_t nLo = aIndexer.getLowIndex();
+    size_t nHi = aIndexer.getHighIndex();
+    NonEmptyCellIndexer::CellType aCellData;
 
     // Bookkeeping values for breaking up the binary search in case the data
     // range isn't strictly sorted.
-    PositionType aLastInRange = aLoPos;
-    PositionType aFirstLastInRange = aLastInRange;
+    size_t nLastInRange = nLo;
+    size_t nFirstLastInRange = nLastInRange;
     double fLastInRangeValue = bLessEqual ?
         -(::std::numeric_limits<double>::max()) :
             ::std::numeric_limits<double>::max();
     OUString aLastInRangeString;
     if (!bLessEqual)
         aLastInRangeString = OUString(sal_Unicode(0xFFFF));
-    if (aLastInRange.first != pCol->maCells.end())
+
+    aCellData = aIndexer.getCell(nLastInRange);
+    aCell = aCellData.first;
+    if (aCell.hasString())
     {
-        aCell = sc::toRefCell(aLastInRange.first, aLastInRange.second);
-        if (aCell.hasString())
-        {
-            sal_uLong nFormat = pCol->GetNumberFormat(toLogicalPos(aLastInRange));
-            OUString aStr;
-            ScCellFormat::GetInputString(aCell, nFormat, aStr, rFormatter, pDoc);
-            aLastInRangeString = aStr;
-        }
-        else
+        sal_uLong nFormat = pCol->GetNumberFormat(aCellData.second);
+        OUString aStr;
+        ScCellFormat::GetInputString(aCell, nFormat, aStr, rFormatter, pDoc);
+        aLastInRangeString = aStr;
+    }
+    else
+    {
+        switch (aCell.meType)
         {
-            switch (aCell.meType)
+            case CELLTYPE_VALUE :
+                fLastInRangeValue = aCell.mfValue;
+            break;
+            case CELLTYPE_FORMULA :
+                fLastInRangeValue = aCell.mpFormula->GetValue();
+            break;
+            default:
             {
-                case CELLTYPE_VALUE :
-                    fLastInRangeValue = aCell.mfValue;
-                break;
-                case CELLTYPE_FORMULA :
-                    fLastInRangeValue = aCell.mpFormula->GetValue();
-                break;
-                default:
-                {
-                    // added to avoid warnings
-                }
+                // added to avoid warnings
             }
         }
     }
@@ -1541,38 +1733,22 @@ bool ScQueryCellIterator::BinarySearch()
     sal_Int32 nRes = 0;
     bool bFound = false;
     bool bDone = false;
-    size_t nLogicalLow = toLogicalPos(aLoPos), nLogicalHigh = toLogicalPos(aHiPos);
-    while (nLogicalLow <= nLogicalHigh && !bDone)
+    while (nLo <= nHi && !bDone)
     {
-        size_t nMid = (nLogicalLow+nLogicalHigh)/2;
+        size_t nMid = (nLo+nHi)/2;
         size_t i = nMid;
-        if (i > nLogicalHigh)
+        if (i > nHi)
         {
             if (nMid > 0)
-                nLogicalHigh = nMid - 1;
+                nHi = nMid - 1;
             else
                 bDone = true;
             continue; // while
         }
 
-        bool bHaveRefCell = false;
-        PositionType aPos = pCol->maCells.position(i);
-        bool bStr;
-        switch (aPos.first->type)
-        {
-            case sc::element_type_formula:
-                aCell = sc::toRefCell(aPos.first, aPos.second);
-                bHaveRefCell = true;
-                bStr = aCell.hasString();
-                break;
-            case sc::element_type_string:
-            case sc::element_type_edittext:
-                bStr = true;
-                break;
-            default:
-                bStr = false;
-                break;
-        }
+        aCellData = aIndexer.getCell(i);
+        aCell = aCellData.first;
+        bool bStr = aCell.hasString();
         nRes = 0;
 
         // compares are content<query:-1, content>query:1
@@ -1580,16 +1756,12 @@ bool ScQueryCellIterator::BinarySearch()
         if (!bStr && !bByString)
         {
             double nCellVal;
-            if (!bHaveRefCell)
-                aCell = sc::toRefCell(aPos.first, aPos.second);
             switch (aCell.meType)
             {
                 case CELLTYPE_VALUE :
-                    nCellVal = aCell.mfValue;
-                    break;
                 case CELLTYPE_FORMULA :
-                    nCellVal = aCell.mpFormula->GetValue();
-                    break;
+                    nCellVal = aCell.getValue();
+                break;
                 default:
                     nCellVal = 0.0;
             }
@@ -1602,12 +1774,12 @@ bool ScQueryCellIterator::BinarySearch()
                     if (fLastInRangeValue < nCellVal)
                     {
                         fLastInRangeValue = nCellVal;
-                        aLastInRange = aPos;
+                        nLastInRange = i;
                     }
                     else if (fLastInRangeValue > nCellVal)
                     {
                         // not strictly sorted, continue with GetThis()
-                        aLastInRange = aFirstLastInRange;
+                        nLastInRange = nFirstLastInRange;
                         bDone = true;
                     }
                 }
@@ -1621,12 +1793,12 @@ bool ScQueryCellIterator::BinarySearch()
                     if (fLastInRangeValue > nCellVal)
                     {
                         fLastInRangeValue = nCellVal;
-                        aLastInRange = aPos;
+                        nLastInRange = i;
                     }
                     else if (fLastInRangeValue < nCellVal)
                     {
                         // not strictly sorted, continue with GetThis()
-                        aLastInRange = aFirstLastInRange;
+                        nLastInRange = nFirstLastInRange;
                         bDone = true;
                     }
                 }
@@ -1635,9 +1807,7 @@ bool ScQueryCellIterator::BinarySearch()
         else if (bStr && bByString)
         {
             OUString aCellStr;
-            sal_uLong nFormat = pCol->GetNumberFormat(i);
-            if (!bHaveRefCell)
-                aCell = sc::toRefCell(aPos.first, aPos.second);
+            sal_uLong nFormat = pCol->GetNumberFormat(aCellData.second);
             ScCellFormat::GetInputString(aCell, nFormat, aCellStr, rFormatter, pDoc);
 
             nRes = pCollator->compareString(aCellStr, rEntry.GetQueryItem().maString.getString());
@@ -1648,12 +1818,12 @@ bool ScQueryCellIterator::BinarySearch()
                 if (nTmp < 0)
                 {
                     aLastInRangeString = aCellStr;
-                    aLastInRange = aPos;
+                    nLastInRange = i;
                 }
                 else if (nTmp > 0)
                 {
                     // not strictly sorted, continue with GetThis()
-                    aLastInRange = aFirstLastInRange;
+                    nLastInRange = nFirstLastInRange;
                     bDone = true;
                 }
             }
@@ -1664,12 +1834,12 @@ bool ScQueryCellIterator::BinarySearch()
                 if (nTmp > 0)
                 {
                     aLastInRangeString = aCellStr;
-                    aLastInRange = aPos;
+                    nLastInRange = i;
                 }
                 else if (nTmp < 0)
                 {
                     // not strictly sorted, continue with GetThis()
-                    aLastInRange = aFirstLastInRange;
+                    nLastInRange = nFirstLastInRange;
                     bDone = true;
                 }
             }
@@ -1678,22 +1848,22 @@ bool ScQueryCellIterator::BinarySearch()
         {
             nRes = -1; // numeric < string
             if (bLessEqual)
-                aLastInRange = aPos;
+                nLastInRange = i;
         }
         else // if (bStr && !bByString)
         {
             nRes = 1; // string > numeric
             if (!bLessEqual)
-                aLastInRange = aPos;
+                nLastInRange = i;
         }
         if (nRes < 0)
         {
             if (bLessEqual)
-                nLogicalLow = nMid + 1;
+                nLo = nMid + 1;
             else // assumed to be SC_GREATER_EQUAL
             {
                 if (nMid > 0)
-                    nLogicalHigh = nMid - 1;
+                    nHi = nMid - 1;
                 else
                     bDone = true;
             }
@@ -1703,19 +1873,20 @@ bool ScQueryCellIterator::BinarySearch()
             if (bLessEqual)
             {
                 if (nMid > 0)
-                    nLogicalHigh = nMid - 1;
+                    nHi = nMid - 1;
                 else
                     bDone = true;
             }
             else // assumed to be SC_GREATER_EQUAL
-                nLogicalLow = nMid + 1;
+                nLo = nMid + 1;
         }
         else
         {
-            aLoPos = aPos;
+            nLo = i;
             bDone = bFound = true;
         }
     }
+
     if (!bFound)
     {
         // If all hits didn't result in a moving limit there's something
@@ -1726,13 +1897,14 @@ bool ScQueryCellIterator::BinarySearch()
         // Else, in case no exact match was found, we step back for a
         // subsequent GetThis() to find the last in range. Effectively this is
         // --nLo with nLastInRange == nLo-1. Both conditions combined yield:
-        aLoPos = aLastInRange;
+        nLo = nLastInRange;
     }
 
-    if (aLoPos.first != pCol->maCells.end() && toLogicalPos(aLoPos) <= static_cast<size_t>(mpParam->nRow2))
+    aCellData = aIndexer.getCell(nLo);
+    if (nLo <= nHi && aCellData.second <= mpParam->nRow2)
     {
-        nRow = toLogicalPos(aLoPos);
-        maCurPos = aLoPos;
+        nRow = aCellData.second;
+        maCurPos = aIndexer.getPosition(nLo);
         return true;
     }
     else


More information about the Libreoffice-commits mailing list