[Libreoffice-commits] .: Branch 'feature/bplustree' - 4 commits - sw/inc sw/qa

Jan Holesovsky kendy at suse.cz
Thu Jan 31 12:21:58 PST 2013


 sw/inc/densebplustree.cxx          |   79 ++++++++++++++++++++++---------------
 sw/inc/densebplustree.hxx          |    4 -
 sw/qa/core/densebplustree-test.cxx |   27 ++++++++++++
 3 files changed, 76 insertions(+), 34 deletions(-)

New commits:
commit a16616044711b2af07284edbb6b49facccc5fa20
Author: Jan Holesovsky <kendy at suse.cz>
Date:   Thu Jan 31 21:06:42 2013 +0100

    Dense B+ tree: Don't initilize NodeWithIndex in the default constructor.
    
    It is not necessary, and valgrind suggests it takes us some time.  Indeed:
    
    BigPtrArray - append: 45 msec
    BigPtrArray - insert at front: 6580 msec
    BigPtrArray - insert in the middle: 3081 msec
    DenseBPlusTree - append: 47 msec
    DenseBPlusTree - insert at front: 167 msec
    DenseBPlusTree - insert in the middle: 87 msec
    
    I am happy now, I do not think I can do it any better.
    
    Change-Id: If7c6882daf712af37db4b43c13ab6aedb0086da0

diff --git a/sw/inc/densebplustree.hxx b/sw/inc/densebplustree.hxx
index be3252a..90cb400 100644
--- a/sw/inc/densebplustree.hxx
+++ b/sw/inc/densebplustree.hxx
@@ -96,7 +96,7 @@ private:
         DBPTreeNode< Key, Value > *pNode;
         Key nIndex;
 
-        NodeWithIndex() : pNode( NULL ), nIndex( 0 ) {}
+        NodeWithIndex() {}
         NodeWithIndex( DBPTreeNode< Key, Value > *p, Key n ) : pNode( p ), nIndex( n ) {}
     };
 
commit 2bcd2598ad45738079889752577fa1cb6f7e0c93
Author: Jan Holesovsky <kendy at suse.cz>
Date:   Thu Jan 31 20:23:20 2013 +0100

    Dense B+ tree: Use binary search when searching in the Keys inside nodes.
    
    Also started measuring one more case - inserting in the middle.  Before this
    change, it took about 100 msec.
    
    BigPtrArray - append: 45 msec
    BigPtrArray - insert at front: 6510 msec
    BigPtrArray - insert in the middle: 3043 msec
    DenseBPlusTree - append: 48 msec
    DenseBPlusTree - insert at front: 167 msec
    DenseBPlusTree - insert in the middle: 90 msec
    
    Change-Id: I2cc0a151b26931d90c8915a6ba879cf0e386b3b2

diff --git a/sw/inc/densebplustree.cxx b/sw/inc/densebplustree.cxx
index a186546..4ea3892 100644
--- a/sw/inc/densebplustree.cxx
+++ b/sw/inc/densebplustree.cxx
@@ -289,13 +289,28 @@ typename DenseBPlusTree< Key, Value >::NodeWithIndex DenseBPlusTree< Key, Value
     DBPTreeNode< Key, Value > *pNode = m_pRoot;
     rParentsLength = 0;
 
-    // recursion is nice for the alg. description, but for implementation, we
-    // want to unwind it
+    // traverse from the root to the leaves
     while ( pNode->m_bIsInternal )
     {
-        int i = 0;
-        while ( i < pNode->m_nUsed - 1 && pNode->m_pKeys[ i ] <= nPos )
-            ++i;
+        int i;
+        if ( pNode->m_nUsed < 2 || nPos < pNode->m_pKeys[ 0 ] )  // nPos too small, we continue leftmost
+            i = 0;
+        else if ( pNode->m_pKeys[ pNode->m_nUsed - 2 ] <= nPos ) // nPos is too big, continue rightmost
+            i = pNode->m_nUsed - 1;
+        else
+        {
+            // binary search, the values are ordered
+            i = 1;
+            int max = pNode->m_nUsed - 2;
+            while ( i < max )
+            {
+                int pivot = i + ( max - i ) / 2;
+                if ( pNode->m_pKeys[ pivot ] <= nPos )
+                    i = pivot + 1;
+                else
+                    max = pivot;
+            }
+        }
 
         // m_pKeys in children are relative
         if ( i > 0 )
diff --git a/sw/qa/core/densebplustree-test.cxx b/sw/qa/core/densebplustree-test.cxx
index 0ea0d8e..38e9501 100644
--- a/sw/qa/core/densebplustree-test.cxx
+++ b/sw/qa/core/densebplustree-test.cxx
@@ -70,6 +70,13 @@ int main( int, char** )
     print_time( "BigPtrArray - insert at front", tv_before, tv_after );
 
     gettimeofday( &tv_before, NULL );
+    BigPtrArray bparr3;
+    for ( int i = 0; i < 1000000; i++ )
+        bparr3.Insert( new BigPtrEntryMock(i), bparr3.Count() / 2 );
+    gettimeofday( &tv_after, NULL );
+    print_time( "BigPtrArray - insert in the middle", tv_before, tv_after );
+
+    gettimeofday( &tv_before, NULL );
     DenseBPlusTree< int, BigPtrEntryMock* > aTest;
     for ( int i = 0; i < 1000000; ++i )
         aTest.Insert( new BigPtrEntryMock(i), aTest.Count() );
@@ -82,6 +89,13 @@ int main( int, char** )
         aTest2.Insert( new BigPtrEntryMock(i), 0 );
     gettimeofday( &tv_after, NULL );
     print_time( "DenseBPlusTree - insert at front", tv_before, tv_after );
+
+    gettimeofday( &tv_before, NULL );
+    DenseBPlusTree< int, BigPtrEntryMock* > aTest3;
+    for ( int i = 0; i < 1000000; ++i )
+        aTest3.Insert( new BigPtrEntryMock(i), aTest3.Count() / 2 );
+    gettimeofday( &tv_after, NULL );
+    print_time( "DenseBPlusTree - insert in the middle", tv_before, tv_after );
 #endif
 
 #if 0
commit 40605ecc1faa02939e5e03abecdadbaea6afbe12
Author: Jan Holesovsky <kendy at suse.cz>
Date:   Thu Jan 31 18:59:22 2013 +0100

    Dense B+ tree: Avoid some unnecessary (and actually wrong) memcpy's.
    
    No performance impact - too rare operation.
    
    Change-Id: I0b7095b7b369753ed54c3a72f52f8d9cf95f26e7

diff --git a/sw/inc/densebplustree.cxx b/sw/inc/densebplustree.cxx
index cebf95d..a186546 100644
--- a/sw/inc/densebplustree.cxx
+++ b/sw/inc/densebplustree.cxx
@@ -357,31 +357,23 @@ DBPTreeNode< Key, Value >* DenseBPlusTree< Key, Value >::splitNode( DBPTreeNode<
         {
             aParent.pNode->insert( aParent.nIndex + 1, offset, pNewNode );
 
-            memcpy( pNewParents, pParents, sizeof( pParents[ 0 ] ) * nParentsLength );
+            memcpy( pNewParents, pParents, sizeof( pParents[ 0 ] ) * ( nParentsLength - 1 ) );
             rNewParentsLength = nParentsLength;
             pNewParents[ rNewParentsLength - 1 ] = aParent;
         }
         else
         {
-            NodeWithIndex pRetParents[ m_nDepth ];
-            int nRetParentsLength;
-            DBPTreeNode< Key, Value > *pNewParent = splitNode( aParent.pNode, bIsAppend, pParents, nParentsLength - 1, pRetParents, nRetParentsLength );
+            DBPTreeNode< Key, Value > *pNewParent = splitNode( aParent.pNode, bIsAppend, pParents, nParentsLength - 1, pNewParents, rNewParentsLength );
 
             if ( aParent.nIndex <= aParent.pNode->m_nUsed )
             {
                 aParent.pNode->insert( aParent.nIndex + 1, offset, pNewNode );
-
-                memcpy( pNewParents, pParents, sizeof( pParents[ 0 ] ) * nParentsLength );
-                rNewParentsLength = nParentsLength;
-                pNewParents[ rNewParentsLength - 1 ] = aParent;
+                pNewParents[ rNewParentsLength++ ] = aParent;
             }
             else
             {
                 pNewParent->insert( aParent.nIndex - aParent.pNode->m_nUsed + 1, offset, pNewNode );
-
-                memcpy( pNewParents, pParents, sizeof( pParents[ 0 ] ) * nParentsLength );
-                rNewParentsLength = nParentsLength;
-                pNewParents[ rNewParentsLength - 1 ] = NodeWithIndex( pNewParent, aParent.nIndex - aParent.pNode->m_nUsed + 1 );
+                pNewParents[ rNewParentsLength++ ] = NodeWithIndex( pNewParent, aParent.nIndex - aParent.pNode->m_nUsed + 1 );
             }
         }
     }
diff --git a/sw/qa/core/densebplustree-test.cxx b/sw/qa/core/densebplustree-test.cxx
index e270d09..0ea0d8e 100644
--- a/sw/qa/core/densebplustree-test.cxx
+++ b/sw/qa/core/densebplustree-test.cxx
@@ -52,6 +52,7 @@ void print_time( const char* msg, const struct timeval &before, const struct tim
 
 int main( int, char** )
 {
+#if 1
     struct timeval tv_before, tv_after;
 
     gettimeofday( &tv_before, NULL );
@@ -81,6 +82,18 @@ int main( int, char** )
         aTest2.Insert( new BigPtrEntryMock(i), 0 );
     gettimeofday( &tv_after, NULL );
     print_time( "DenseBPlusTree - insert at front", tv_before, tv_after );
+#endif
+
+#if 0
+    DenseBPlusTree< int, int > aNumbers;
+    aNumbers.Insert( 20, 0 );
+    aNumbers.Insert( 10, 0 );
+    aNumbers.Insert( 30, 2 );
+    aNumbers.Insert( 1000, 3 );
+    for ( int i = 0; i < 100; ++i )
+        aNumbers.Insert( i, 3 );//aNumbers.Count() );
+    aNumbers.dump();
+#endif
 
     return 0;
 }
commit 509d83a3f7ddbff57ceb11a91f5aa210d14f079a
Author: Jan Holesovsky <kendy at suse.cz>
Date:   Thu Jan 31 18:40:20 2013 +0100

    Dense B+ tree: Produce more packed trees with append()s.
    
    Normally we split the nodes half/half; but in the append() case, it is much
    more probable that the next operation will be an append() too, so let most of
    the data (children pointers or values) in the original node, and transfer just
    2 to the newly created one (so that there is still space for appends in the
    middle without having to grow the tree).
    
    BigPtrArray - append: 46 msec
    BigPtrArray - insert at front: 6661 msec
    DenseBPlusTree - append: 50 msec
    DenseBPlusTree - insert at front: 169 msec
    
    Change-Id: I7e492abad8238d40e79607e354b0bdee9bd386a4

diff --git a/sw/inc/densebplustree.cxx b/sw/inc/densebplustree.cxx
index c936ad8..cebf95d 100644
--- a/sw/inc/densebplustree.cxx
+++ b/sw/inc/densebplustree.cxx
@@ -106,38 +106,46 @@ struct DBPTreeNode
     /** Split node, and make the original one smaller.
 
         @return relative key shift of the node.
+        @param bIsAppend in case we are appending, we deliberately keep most of the data untouched, creating as empty node as possible
     */
-    int copyFromSplitNode( DBPTreeNode *pNode )
+    int copyFromSplitNode( DBPTreeNode *pNode, bool bIsAppend )
     {
+        assert( sOrder > 2 );
         assert( pNode->m_nUsed == sOrder );
 
-        const int sKeep = sOrder / 2;
+        // we optimize for the case of appending
+        // it is expected that we first create the entire structure (so want
+        // it to be dense from the space point of view), but when performing
+        // later, the distribution has to be 'fair', because the access is
+        // more or less random
+        int nHowMuchKeep = bIsAppend? sOrder - 2: sOrder / 2;
+
         int offset = 0;
 
         m_bIsInternal = pNode->m_bIsInternal;
         if ( m_bIsInternal )
         {
-            for ( int i = sKeep; i < pNode->m_nUsed; ++i )
-                m_pChildren[ i - sKeep ] = pNode->m_pChildren[ i ];
+            for ( int i = nHowMuchKeep; i < pNode->m_nUsed; ++i )
+                m_pChildren[ i - nHowMuchKeep ] = pNode->m_pChildren[ i ];
 
             // we have to 'relativize' the keys
-            offset = pNode->m_pKeys[ sKeep - 1 ];
-            for ( int i = sKeep; i < pNode->m_nUsed - 1; ++i )
-                m_pKeys[ i - sKeep ] = pNode->m_pKeys[ i ] - offset;
+            offset = pNode->m_pKeys[ nHowMuchKeep - 1 ];
+            for ( int i = nHowMuchKeep; i < pNode->m_nUsed - 1; ++i )
+                m_pKeys[ i - nHowMuchKeep ] = pNode->m_pKeys[ i ] - offset;
         }
         else
         {
-            for ( int i = sKeep; i < pNode->m_nUsed; ++i )
-                m_pValues[ i - sKeep ] = pNode->m_pValues[ i ];
+            for ( int i = nHowMuchKeep; i < pNode->m_nUsed; ++i )
+                m_pValues[ i - nHowMuchKeep ] = pNode->m_pValues[ i ];
 
-            offset = sKeep;
+            offset = nHowMuchKeep;
 
             m_pNext = pNode->m_pNext;
             pNode->m_pNext = this;
         }
 
-        m_nUsed = pNode->m_nUsed - sKeep;
-        pNode->m_nUsed = sKeep;
+        m_nUsed = pNode->m_nUsed - nHowMuchKeep;
+        pNode->m_nUsed = nHowMuchKeep;
 
         return offset;
     }
@@ -179,7 +187,7 @@ void DenseBPlusTree< Key, Value >::Insert( const Value& rValue, Key nPos )
     {
         NodeWithIndex pNewParents[ m_nDepth ];
         int nNewParentsLength;
-        DBPTreeNode< Key, Value > *pNewLeaf = splitNode( aLeaf.pNode, pParents, nParentsLength, pNewParents, nNewParentsLength );
+        DBPTreeNode< Key, Value > *pNewLeaf = splitNode( aLeaf.pNode, nPos == m_nCount, pParents, nParentsLength, pNewParents, nNewParentsLength );
 
         if ( aLeaf.nIndex <= aLeaf.pNode->m_nUsed )
             aLeaf.pNode->insert( aLeaf.nIndex, rValue );
@@ -314,12 +322,12 @@ void DenseBPlusTree< Key, Value >::shiftNodes( const NodeWithIndex pParents[], i
 }
 
 template < class Key, class Value >
-DBPTreeNode< Key, Value >* DenseBPlusTree< Key, Value >::splitNode( DBPTreeNode< Key, Value > *pNode, const NodeWithIndex pParents[], int nParentsLength, NodeWithIndex pNewParents[], int &rNewParentsLength )
+DBPTreeNode< Key, Value >* DenseBPlusTree< Key, Value >::splitNode( DBPTreeNode< Key, Value > *pNode, bool bIsAppend, const NodeWithIndex pParents[], int nParentsLength, NodeWithIndex pNewParents[], int &rNewParentsLength )
 {
     assert( pNode->m_nUsed == sOrder );
 
     DBPTreeNode< Key, Value > *pNewNode = new DBPTreeNode< Key, Value >;
-    int offset = pNewNode->copyFromSplitNode( pNode );
+    int offset = pNewNode->copyFromSplitNode( pNode, bIsAppend );
 
     // update the last leaf if necessary
     if ( pNode == m_pLastLeaf )
@@ -357,7 +365,7 @@ DBPTreeNode< Key, Value >* DenseBPlusTree< Key, Value >::splitNode( DBPTreeNode<
         {
             NodeWithIndex pRetParents[ m_nDepth ];
             int nRetParentsLength;
-            DBPTreeNode< Key, Value > *pNewParent = splitNode( aParent.pNode, pParents, nParentsLength - 1, pRetParents, nRetParentsLength );
+            DBPTreeNode< Key, Value > *pNewParent = splitNode( aParent.pNode, bIsAppend, pParents, nParentsLength - 1, pRetParents, nRetParentsLength );
 
             if ( aParent.nIndex <= aParent.pNode->m_nUsed )
             {
diff --git a/sw/inc/densebplustree.hxx b/sw/inc/densebplustree.hxx
index 027e2d8..be3252a 100644
--- a/sw/inc/densebplustree.hxx
+++ b/sw/inc/densebplustree.hxx
@@ -123,7 +123,7 @@ private:
     void shiftNodes( const NodeWithIndex pParents[], int nParentsLength, int nHowMuch );
 
     /// Split the node, and adjust parents accordingly.
-    DBPTreeNode< Key, Value >* splitNode( DBPTreeNode< Key, Value > *pNode, const NodeWithIndex pParents[], int nParentsLength, NodeWithIndex *pNewParents, int &rNewParentsLength );
+    DBPTreeNode< Key, Value >* splitNode( DBPTreeNode< Key, Value > *pNode, bool bIsAppend, const NodeWithIndex pParents[], int nParentsLength, NodeWithIndex *pNewParents, int &rNewParentsLength );
 };
 
 // include the implementation


More information about the Libreoffice-commits mailing list