[PATCH] sot: re-work OLE2 offset-to-page computation with a FAT chain cache

Michael Meeks michael.meeks at suse.com
Fri May 11 12:19:21 PDT 2012


---
 sot/qa/cppunit/test_sot.cxx    |   74 ++++++++++++++++++++-
 sot/source/sdstor/stgstrms.cxx |  144 +++++++++++++++++++++++++++++-----------
 sot/source/sdstor/stgstrms.hxx |    3 +
 3 files changed, 180 insertions(+), 41 deletions(-)

diff --git a/sot/qa/cppunit/test_sot.cxx b/sot/qa/cppunit/test_sot.cxx
index 36d0b02..0f91bdf 100644
--- a/sot/qa/cppunit/test_sot.cxx
+++ b/sot/qa/cppunit/test_sot.cxx
@@ -33,6 +33,7 @@
 #include <osl/file.hxx>
 #include <osl/process.h>
 #include <sot/storage.hxx>
+#include <sot/storinfo.hxx>
 
 using namespace ::com::sun::star;
 
@@ -45,6 +46,11 @@ namespace
     public:
         SotTest() {}
 
+        bool checkStream( const SotStorageRef &xObjStor,
+                          const String &rStreamName,
+                          sal_uLong nSize );
+        bool checkStorage( const SotStorageRef &xObjStor );
+
         virtual bool load(const rtl::OUString &,
             const rtl::OUString &rURL, const rtl::OUString &);
 
@@ -55,12 +61,78 @@ namespace
         CPPUNIT_TEST_SUITE_END();
     };
 
+    bool SotTest::checkStream( const SotStorageRef &xObjStor,
+                               const String &rStreamName,
+                               sal_uLong nSize )
+    {
+        unsigned char *pData = (unsigned char*)malloc( nSize );
+        sal_uLong nReadableSize = 0;
+        if( !pData )
+            return true;
+
+        {   // Read the data in one block
+            SotStorageStreamRef xStream( xObjStor->OpenSotStream( rStreamName ) );
+            xStream->Seek(0);
+            sal_uLong nRemaining = xStream->GetSize() - xStream->Tell();
+
+            CPPUNIT_ASSERT_MESSAGE( "check size", nRemaining == nSize );
+            CPPUNIT_ASSERT_MESSAGE( "check size #2", xStream->remainingSize() == nSize );
+
+            // Read as much as we can, a corrupted FAT chain can cause real grief here
+            nReadableSize = xStream->Read( (void *)pData, nSize );
+//            fprintf(stderr, "readable size %d vs size %d remaining %d\n", nReadableSize, nSize, nReadableSize);
+        }
+        {   // Read the data backwards as well
+            SotStorageStreamRef xStream( xObjStor->OpenSotStream( rStreamName ) );
+            for( sal_uLong i = nReadableSize; i > 0; i-- )
+            {
+                CPPUNIT_ASSERT_MESSAGE( "sot reading error", !xStream->GetError() );
+                unsigned char c;
+                xStream->Seek( i - 1 );
+                CPPUNIT_ASSERT_MESSAGE( "sot storage reading byte",
+                                        xStream->Read( &c, 1 ) == 1);
+                CPPUNIT_ASSERT_MESSAGE( "mismatching data storage reading byte",
+                                        pData[i - 1] == c );
+            }
+        }
+
+        return true;
+    }
+
+    bool SotTest::checkStorage( const SotStorageRef &xObjStor )
+    {
+        SvStorageInfoList aInfoList;
+        xObjStor->FillInfoList( &aInfoList );
+
+        for( SvStorageInfoList::iterator aIt = aInfoList.begin();
+             aIt != aInfoList.end(); aIt++ )
+        {
+            sal_uLong nSize = aIt->GetSize();
+            fprintf( stderr, "Stream '%s' size %ld\n",
+                     rtl::OUStringToOString( aIt->GetName(), RTL_TEXTENCODING_UTF8 ).getStr(),
+                     (long)nSize );
+            if( aIt->IsStorage() )
+            {
+                SotStorageRef xChild( xObjStor->OpenSotStorage( aIt->GetName() ) );
+                checkStorage( xChild );
+            }
+            else if( aIt->IsStream() )
+                checkStream( xObjStor, aIt->GetName(), aIt->GetSize() );
+        }
+
+        return true;
+    }
+
     bool SotTest::load(const rtl::OUString &,
         const rtl::OUString &rURL, const rtl::OUString &)
     {
         SvFileStream aStream(rURL, STREAM_READ);
         SotStorageRef xObjStor = new SotStorage(aStream);
-        return xObjStor.Is() && !xObjStor->GetError();
+        if (!xObjStor.Is() && !xObjStor->GetError())
+            return false;
+
+        CPPUNIT_ASSERT_MESSAGE("sot storage is not valid", xObjStor->Validate());
+        return checkStorage (xObjStor);
     }
 
     void SotTest::test()
diff --git a/sot/source/sdstor/stgstrms.cxx b/sot/source/sdstor/stgstrms.cxx
index 5e6c1ea..22ff35f 100644
--- a/sot/source/sdstor/stgstrms.cxx
+++ b/sot/source/sdstor/stgstrms.cxx
@@ -26,12 +26,12 @@
  *
  ************************************************************************/
 
+#include <algorithm>
 
 #include <string.h>     // memcpy()
-
+#include <sal/log.hxx>
 #include <osl/file.hxx>
 #include <tools/tempfile.hxx>
-#include <tools/debug.hxx>
 
 #include "sot/stg.hxx"
 #include "stgelem.hxx"
@@ -334,13 +334,46 @@ void StgStrm::SetEntry( StgDirEntry& r )
     r.SetDirty();
 }
 
+/*
+ * The page chain, is basically a singly linked list of slots each
+ * point to the next page. Instead of traversing the file structure
+ * for this each time build a simple flat in-memory vector list
+ * of pages.
+ */
+void StgStrm::scanBuildPageChainCache(sal_Int32 *pOptionalCalcSize)
+{
+    if (nSize > 0)
+        m_aPagesCache.reserve(nSize/nPageSize);
+
+    bool bError = false;
+    sal_Int32 nBgn = nStart;
+    sal_Int32 nOldBgn = -1;
+    sal_Int32 nOptSize = 0;
+    while( nBgn >= 0 && nBgn != nOldBgn )
+    {
+        if( nBgn >= 0 )
+            m_aPagesCache.push_back(nBgn);
+        nOldBgn = nBgn;
+        nBgn = pFat->GetNextPage( nBgn );
+        if( nBgn == nOldBgn )
+            bError = true;
+        nOptSize += nPageSize;
+    }
+    if (bError)
+    {
+        if (pOptionalCalcSize)
+            rIo.SetError( ERRCODE_IO_WRONGFORMAT );
+        m_aPagesCache.clear();
+    }
+    if (pOptionalCalcSize)
+        *pOptionalCalcSize = nOptSize;
+}
+
 // Compute page number and offset for the given byte position.
 // If the position is behind the size, set the stream right
 // behind the EOF.
-
 sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos )
 {
-    sal_Int32 nRel, nBgn;
     // Values < 0 seek to the end
     if( nBytePos < 0 || nBytePos >= nSize )
         nBytePos = nSize;
@@ -353,41 +386,71 @@ sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos )
     nPos = nBytePos;
     if( nOld == nNew )
         return sal_True;
-    if( nNew > nOld )
-    {
-        // the new position is behind the current, so an incremental
-        // positioning is OK. Set the page relative position
-        nRel = nNew - nOld;
-        nBgn = nPage;
-    }
-    else
+
+    // See fdo#47644 for a .doc with a vast amount of pages where seeking around the
+    // document takes a colossal amount of time
+    //
+    // Please Note: we build the pagescache incrementally as we go if necessary,
+    // so that a corrupted FAT doesn't poison the stream state for earlier reads
+    size_t nIdx = nNew / nPageSize;
+    if( nIdx >= m_aPagesCache.size() )
     {
-        // the new position is before the current, so we have to scan
-        // the entire chain.
-        nRel = nNew;
-        nBgn = nStart;
+        // Extend the FAT cache ! ...
+        size_t nToAdd = nIdx + 1;
+
+        if (m_aPagesCache.empty())
+            m_aPagesCache.push_back( nStart );
+
+        nToAdd -= m_aPagesCache.size();
+
+        sal_Int32 nBgn = m_aPagesCache.back();
+//        fprintf (stderr, "nIdx %d to-add %d start page %d pagesize %d\n",
+//                 (int)nIdx, (int)nToAdd, (int)nBgn, (int) nPageSize);
+
+        // Start adding pages while we can
+        while( nToAdd > 0 && nBgn >= 0 )
+        {
+            nBgn = pFat->GetNextPage( nBgn );
+//            fprintf (stderr, "add page %d\n", (int)nBgn);
+            if( nBgn >= 0 )
+            {
+                m_aPagesCache.push_back( nBgn );
+                nToAdd--;
+            }
+        }
     }
-    // now, traverse the FAT chain.
-    nRel /= nPageSize;
-    sal_Int32 nLast = STG_EOF;
-    while( nRel && nBgn >= 0 )
+
+    if ( nIdx > m_aPagesCache.size() )
     {
-        nLast = nBgn;
-        nBgn = pFat->GetNextPage( nBgn );
-        nRel--;
+        rIo.SetError( SVSTREAM_FILEFORMAT_ERROR );
+        nPage = STG_EOF;
+        nOffset = nPageSize;
+//        fprintf (stderr, "returns over-end page %d offset %d err %d ret %d\n",
+//                 (int)nPage, (int)nOffset, (int) rIo.GetError(), false);
+        return sal_False;
     }
     // special case: seek to 1st byte of new, unallocated page
     // (in case the file size is a multiple of the page size)
-    if( nBytePos == nSize && nBgn == STG_EOF && !nRel && !nOffset )
-        nBgn = nLast, nOffset = nPageSize;
-    if( nBgn < 0 && nBgn != STG_EOF )
+    if( nBytePos == nSize && !nOffset && nIdx > 0 && nIdx == m_aPagesCache.size() )
     {
-        rIo.SetError( SVSTREAM_FILEFORMAT_ERROR );
-        nBgn = STG_EOF;
+//        fprintf (stderr, "this odd case\n");
+        nIdx--;
         nOffset = nPageSize;
     }
-    nPage = nBgn;
-    return sal_Bool( nRel == 0 && nPage >= 0 );
+    else if ( nIdx == m_aPagesCache.size() )
+    {
+        nPage = STG_EOF;
+//        fprintf (stderr, "returns eof page %d offset %d err %d ret %d\n",
+//                 (int)nPage, (int)nOffset, (int) rIo.GetError(), false);
+        return sal_False;
+    }
+
+    nPage = m_aPagesCache[ nIdx ];
+
+//    fprintf (stderr, "returns page %d offset %d err %d ret %d\n",
+//             (int)nPage, (int)nOffset, (int) rIo.GetError(), nPage >= 0 );
+
+    return nPage >= 0;
 }
 
 // Retrieve the physical page for a given byte offset.
@@ -404,6 +467,8 @@ StgPage* StgStrm::GetPhysPage( sal_Int32 nBytePos, sal_Bool bForce )
 
 sal_Bool StgStrm::Copy( sal_Int32 nFrom, sal_Int32 nBytes )
 {
+    m_aPagesCache.clear();
+
     sal_Int32 nTo = nStart;
     sal_Int32 nPgs = ( nBytes + nPageSize - 1 ) / nPageSize;
     while( nPgs-- )
@@ -430,6 +495,8 @@ sal_Bool StgStrm::Copy( sal_Int32 nFrom, sal_Int32 nBytes )
 
 sal_Bool StgStrm::SetSize( sal_Int32 nBytes )
 {
+    m_aPagesCache.clear();
+
     // round up to page size
     sal_Int32 nOld = ( ( nSize + nPageSize - 1 ) / nPageSize ) * nPageSize;
     sal_Int32 nNew = ( ( nBytes + nPageSize - 1 ) / nPageSize ) * nPageSize;
@@ -528,6 +595,8 @@ sal_Int32 StgFATStrm::GetPage( short nOff, sal_Bool bMake, sal_uInt16 *pnMasterA
         {
             if( bMake )
             {
+                m_aPagesCache.clear();
+
                 // create a new master page
                 nFAT = nMaxPage++;
                 pMaster = rIo.Copy( nFAT, STG_FREE );
@@ -582,6 +651,8 @@ sal_Int32 StgFATStrm::GetPage( short nOff, sal_Bool bMake, sal_uInt16 *pnMasterA
 
 sal_Bool StgFATStrm::SetPage( short nOff, sal_Int32 nNewPage )
 {
+    m_aPagesCache.clear();
+
     sal_Bool bRes = sal_True;
     if( nOff < rIo.aHdr.GetFAT1Size() )
         rIo.aHdr.SetFATPage( nOff, nNewPage );
@@ -631,6 +702,8 @@ sal_Bool StgFATStrm::SetPage( short nOff, sal_Int32 nNewPage )
 
 sal_Bool StgFATStrm::SetSize( sal_Int32 nBytes )
 {
+    m_aPagesCache.clear();
+
     // Set the number of entries to a multiple of the page size
     short nOld = (short) ( ( nSize + ( nPageSize - 1 ) ) / nPageSize );
     short nNew = (short) (
@@ -747,16 +820,7 @@ void StgDataStrm::Init( sal_Int32 nBgn, sal_Int32 nLen )
     {
         // determine the actual size of the stream by scanning
         // the FAT chain and counting the # of pages allocated
-        nSize = 0;
-        sal_Int32 nOldBgn = -1;
-        while( nBgn >= 0 && nBgn != nOldBgn )
-        {
-            nOldBgn = nBgn;
-            nBgn = pFat->GetNextPage( nBgn );
-            if( nBgn == nOldBgn )
-                rIo.SetError( ERRCODE_IO_WRONGFORMAT );
-            nSize += nPageSize;
-        }
+        scanBuildPageChainCache( &nSize );
     }
 }
 
diff --git a/sot/source/sdstor/stgstrms.hxx b/sot/source/sdstor/stgstrms.hxx
index a6a0ad1..5a9558b 100644
--- a/sot/source/sdstor/stgstrms.hxx
+++ b/sot/source/sdstor/stgstrms.hxx
@@ -30,6 +30,7 @@
 #define _STGSTRMS_HXX
 
 #include <tools/stream.hxx>
+#include <vector>
 
 class StgIo;
 class StgStrm;
@@ -77,6 +78,8 @@ protected:
     sal_Int32 nPage;                        // current logical page
     short nOffset;                      // offset into current page
     short nPageSize;                    // logical page size
+    std::vector<sal_Int32> m_aPagesCache;
+    void scanBuildPageChainCache(sal_Int32 *pOptionalCalcSize = NULL);
     sal_Bool  Copy( sal_Int32 nFrom, sal_Int32 nBytes );
     StgStrm( StgIo& );
 public:
-- 
1.7.7


--=-swnRV3e+cYhrg0eqYTem--



More information about the LibreOffice mailing list