[PATCH] sot: Improve OLE2 offset-to-page computation with a FAT layout vector

Michael Meeks michael.meeks at suse.com
Fri May 11 12:19:21 PDT 2012


---
 sot/source/sdstor/stgstrms.cxx |   85 ++++++++++++++++++++++++++++++++++------
 sot/source/sdstor/stgstrms.hxx |    3 +
 2 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/sot/source/sdstor/stgstrms.cxx b/sot/source/sdstor/stgstrms.cxx
index 5e6c1ea..8d3d758 100644
--- a/sot/source/sdstor/stgstrms.cxx
+++ b/sot/source/sdstor/stgstrms.cxx
@@ -26,12 +26,12 @@
  *
  ************************************************************************/
 
+#include <algorithm>
 
 #include <string.h>     // memcpy()
-
+#include <sal/log.hxx>
 #include <osl/file.hxx>
 #include <tools/tempfile.hxx>
-#include <tools/debug.hxx>
 
 #include "sot/stg.hxx"
 #include "stgelem.hxx"
@@ -334,6 +334,37 @@ void StgStrm::SetEntry( StgDirEntry& r )
     r.SetDirty();
 }
 
+/*
+ * The page chain, is basically a singly linked list of slots each
+ * point to the next page. Instead of traversing the file structure
+ * for this each time build a simple flat in-memory vector list
+ * of pages.
+ */
+bool StgStrm::buildPageChainCache()
+{
+    if (nSize > 0)
+        m_aPagesCache.reserve(nSize/nPageSize);
+
+    sal_Int32 nBgn = nStart;
+    while (nBgn >= 0)
+    {
+        m_aPagesCache.push_back(nBgn);
+        sal_Int32 nOldBgn = nBgn;
+        nBgn = pFat->GetNextPage(nBgn);
+        if (nBgn == nOldBgn)
+            return false;
+    }
+
+    return true;
+}
+
+//See fdo#47644 for a .doc with a vast amount of pages where seeking around the
+//document takes a colossal amount of time
+//
+//There's a cost to building a page cache, so only build one if the number of
+//pages to seek through hits some sufficiently high value where it's worth it.
+#define ARBITRARY_LARGE_AMOUNT_OF_PAGES 8 * 512
+
 // Compute page number and offset for the given byte position.
 // If the position is behind the size, set the stream right
 // behind the EOF.
@@ -353,9 +384,35 @@ sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos )
     nPos = nBytePos;
     if( nOld == nNew )
         return sal_True;
+
+    if (m_aPagesCache.empty() && nNew > ARBITRARY_LARGE_AMOUNT_OF_PAGES)
+    {
+        SAL_WARN("sot", "kicking off large seek helper\n");
+        buildPageChainCache();
+    }
+
+    if (!m_aPagesCache.empty())
+    {
+        size_t nIdx = nNew / nPageSize;
+
+        // special case: seek to 1st byte of new, unallocated page
+        // (in case the file size is a multiple of the page size)
+        if( nBytePos == nSize && !nOffset && nIdx == m_aPagesCache.size() )
+        {
+            nIdx--;
+            nOffset = nPageSize;
+        }
+
+        if (nIdx < m_aPagesCache.size())
+        {
+            nPage = m_aPagesCache[ nIdx ];
+            return sal_Bool( nPage >= 0 );
+        }
+    }
+
     if( nNew > nOld )
     {
-        // the new position is behind the current, so an incremental
+        // the new position is after the current, so an incremental
         // positioning is OK. Set the page relative position
         nRel = nNew - nOld;
         nBgn = nPage;
@@ -404,6 +461,8 @@ StgPage* StgStrm::GetPhysPage( sal_Int32 nBytePos, sal_Bool bForce )
 
 sal_Bool StgStrm::Copy( sal_Int32 nFrom, sal_Int32 nBytes )
 {
+    m_aPagesCache.clear();
+
     sal_Int32 nTo = nStart;
     sal_Int32 nPgs = ( nBytes + nPageSize - 1 ) / nPageSize;
     while( nPgs-- )
@@ -430,6 +489,8 @@ sal_Bool StgStrm::Copy( sal_Int32 nFrom, sal_Int32 nBytes )
 
 sal_Bool StgStrm::SetSize( sal_Int32 nBytes )
 {
+    m_aPagesCache.clear();
+
     // round up to page size
     sal_Int32 nOld = ( ( nSize + nPageSize - 1 ) / nPageSize ) * nPageSize;
     sal_Int32 nNew = ( ( nBytes + nPageSize - 1 ) / nPageSize ) * nPageSize;
@@ -528,6 +589,8 @@ sal_Int32 StgFATStrm::GetPage( short nOff, sal_Bool bMake, sal_uInt16 *pnMasterA
         {
             if( bMake )
             {
+                m_aPagesCache.clear();
+
                 // create a new master page
                 nFAT = nMaxPage++;
                 pMaster = rIo.Copy( nFAT, STG_FREE );
@@ -582,6 +645,8 @@ sal_Int32 StgFATStrm::GetPage( short nOff, sal_Bool bMake, sal_uInt16 *pnMasterA
 
 sal_Bool StgFATStrm::SetPage( short nOff, sal_Int32 nNewPage )
 {
+    m_aPagesCache.clear();
+
     sal_Bool bRes = sal_True;
     if( nOff < rIo.aHdr.GetFAT1Size() )
         rIo.aHdr.SetFATPage( nOff, nNewPage );
@@ -631,6 +696,8 @@ sal_Bool StgFATStrm::SetPage( short nOff, sal_Int32 nNewPage )
 
 sal_Bool StgFATStrm::SetSize( sal_Int32 nBytes )
 {
+    m_aPagesCache.clear();
+
     // Set the number of entries to a multiple of the page size
     short nOld = (short) ( ( nSize + ( nPageSize - 1 ) ) / nPageSize );
     short nNew = (short) (
@@ -747,16 +814,10 @@ void StgDataStrm::Init( sal_Int32 nBgn, sal_Int32 nLen )
     {
         // determine the actual size of the stream by scanning
         // the FAT chain and counting the # of pages allocated
-        nSize = 0;
-        sal_Int32 nOldBgn = -1;
-        while( nBgn >= 0 && nBgn != nOldBgn )
-        {
-            nOldBgn = nBgn;
-            nBgn = pFat->GetNextPage( nBgn );
-            if( nBgn == nOldBgn )
+        bool bOk = buildPageChainCache();
+        if (!bOk)
                 rIo.SetError( ERRCODE_IO_WRONGFORMAT );
-            nSize += nPageSize;
-        }
+        nSize = m_aPagesCache.size() * nPageSize;
     }
 }
 
diff --git a/sot/source/sdstor/stgstrms.hxx b/sot/source/sdstor/stgstrms.hxx
index a6a0ad1..6b1b8c3 100644
--- a/sot/source/sdstor/stgstrms.hxx
+++ b/sot/source/sdstor/stgstrms.hxx
@@ -30,6 +30,7 @@
 #define _STGSTRMS_HXX
 
 #include <tools/stream.hxx>
+#include <vector>
 
 class StgIo;
 class StgStrm;
@@ -77,6 +78,8 @@ protected:
     sal_Int32 nPage;                        // current logical page
     short nOffset;                      // offset into current page
     short nPageSize;                    // logical page size
+    std::vector<sal_Int32> m_aPagesCache;
+    bool buildPageChainCache();
     sal_Bool  Copy( sal_Int32 nFrom, sal_Int32 nBytes );
     StgStrm( StgIo& );
 public:
-- 
1.7.7


--=-J8VPl2UOksRWL3hIggYh--



More information about the LibreOffice mailing list