[Libreoffice-commits] .: 2 commits - sc/qa sd/qa sot/source svtools/qa sw/qa writerfilter/qa

Michael Meeks michael at kemper.freedesktop.org
Mon May 14 01:39:51 PDT 2012


 sc/qa/unit/data/README                     |    7 +
 sd/qa/unit/data/README                     |    7 +
 sot/source/sdstor/stgstrms.cxx             |  127 +++++++++++++----------------
 sot/source/sdstor/stgstrms.hxx             |    2 
 svtools/qa/cppunit/data/README             |    7 +
 sw/qa/core/data/README                     |    7 +
 writerfilter/qa/cppunittests/rtftok/README |    7 +
 7 files changed, 94 insertions(+), 70 deletions(-)

New commits:
commit 1d32c56f36adbd0d5801f0fedec3111011ea4d65
Author: Michael Meeks <michael.meeks at suse.com>
Date:   Mon May 14 09:41:02 2012 +0100

    sot: re-work OLE2 offset-to-page computation
    
    The gotcha here is that if we get ahead of ourselves, and read to
    the end of the stream, we detect bad chains too early, so instead
    incrementally build the page chain cache, which is also quicker
    and behaves more similarly to the previous code.

diff --git a/sot/source/sdstor/stgstrms.cxx b/sot/source/sdstor/stgstrms.cxx
index f8b9776..7bef243 100644
--- a/sot/source/sdstor/stgstrms.cxx
+++ b/sot/source/sdstor/stgstrms.cxx
@@ -340,38 +340,40 @@ void StgStrm::SetEntry( StgDirEntry& r )
  * for this each time build a simple flat in-memory vector list
  * of pages.
  */
-bool StgStrm::buildPageChainCache()
+void StgStrm::scanBuildPageChainCache(sal_Int32 *pOptionalCalcSize)
 {
     if (nSize > 0)
         m_aPagesCache.reserve(nSize/nPageSize);
 
+    bool bError = false;
     sal_Int32 nBgn = nStart;
-    while (nBgn >= 0)
+    sal_Int32 nOldBgn = -1;
+    sal_Int32 nOptSize = 0;
+    while( nBgn >= 0 && nBgn != nOldBgn )
     {
-        m_aPagesCache.push_back(nBgn);
-        sal_Int32 nOldBgn = nBgn;
-        nBgn = pFat->GetNextPage(nBgn);
-        if (nBgn == nOldBgn)
-            return false;
+        if( nBgn >= 0 )
+            m_aPagesCache.push_back(nBgn);
+        nOldBgn = nBgn;
+        nBgn = pFat->GetNextPage( nBgn );
+        if( nBgn == nOldBgn )
+            bError = true;
+        nOptSize += nPageSize;
     }
-
-    return true;
+    if (bError)
+    {
+        if (pOptionalCalcSize)
+            rIo.SetError( ERRCODE_IO_WRONGFORMAT );
+        m_aPagesCache.clear();
+    }
+    if (pOptionalCalcSize)
+        *pOptionalCalcSize = nOptSize;
 }
 
-//See fdo#47644 for a .doc with a vast amount of pages where seeking around the
-//document takes a colossal amount of time
-//
-//There's a cost to building a page cache, so only build one if the number of
-//pages to seek through hits some sufficiently high value where it's worth it.
-#define ARBITRARY_LARGE_AMOUNT_OF_PAGES 8 * 512
-
 // Compute page number and offset for the given byte position.
 // If the position is behind the size, set the stream right
 // behind the EOF.
-
 sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos )
 {
-    sal_Int32 nRel, nBgn;
     // Values < 0 seek to the end
     if( nBytePos < 0 || nBytePos >= nSize )
         nBytePos = nSize;
@@ -385,69 +387,59 @@ sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos )
     if( nOld == nNew )
         return sal_True;
 
-    if (m_aPagesCache.empty() && nNew > ARBITRARY_LARGE_AMOUNT_OF_PAGES)
+    // See fdo#47644 for a .doc with a vast amount of pages where seeking around the
+    // document takes a colossal amount of time
+    //
+    // Please Note: we build the pagescache incrementally as we go if necessary,
+    // so that a corrupted FAT doesn't poison the stream state for earlier reads
+    size_t nIdx = nNew / nPageSize;
+    if( nIdx >= m_aPagesCache.size() )
     {
-        SAL_WARN("sot", "kicking off large seek helper\n");
-        buildPageChainCache();
-    }
+        // Extend the FAT cache ! ...
+        size_t nToAdd = nIdx + 1;
 
-    if (!m_aPagesCache.empty())
-    {
-        size_t nIdx = nNew / nPageSize;
+        if (m_aPagesCache.empty())
+            m_aPagesCache.push_back( nStart );
 
-        // special case: seek to 1st byte of new, unallocated page
-        // (in case the file size is a multiple of the page size)
-        if( nBytePos == nSize && !nOffset && nIdx == m_aPagesCache.size() )
-        {
-            nIdx--;
-            nOffset = nPageSize;
-        }
+        nToAdd -= m_aPagesCache.size();
+
+        sal_Int32 nBgn = m_aPagesCache.back();
 
-        if (nIdx < m_aPagesCache.size())
+        // Start adding pages while we can
+        while( nToAdd > 0 && nBgn >= 0 )
         {
-            nPage = m_aPagesCache[ nIdx ];
-            return sal_Bool( nPage >= 0 );
+            nBgn = pFat->GetNextPage( nBgn );
+            if( nBgn >= 0 )
+            {
+                m_aPagesCache.push_back( nBgn );
+                nToAdd--;
+            }
         }
     }
 
-    if( nNew > nOld )
+    if ( nIdx > m_aPagesCache.size() )
     {
-        // the new position is after the current, so an incremental
-        // positioning is OK. Set the page relative position
-        nRel = nNew - nOld;
-        nBgn = nPage;
+        rIo.SetError( SVSTREAM_FILEFORMAT_ERROR );
+        nPage = STG_EOF;
+        nOffset = nPageSize;
+        return sal_False;
     }
-    else
+    // special case: seek to 1st byte of new, unallocated page
+    // (in case the file size is a multiple of the page size)
+    if( nBytePos == nSize && !nOffset && nIdx > 0 && nIdx == m_aPagesCache.size() )
     {
-        // the new position is before the current, so we have to scan
-        // the entire chain.
-        nRel = nNew;
-        nBgn = nStart;
+        nIdx--;
+        nOffset = nPageSize;
     }
-    // now, traverse the FAT chain.
-    nRel /= nPageSize;
-
-    sal_Int32 nLast = STG_EOF;
-    while (nRel && nBgn >= 0)
+    else if ( nIdx == m_aPagesCache.size() )
     {
-        nLast = nBgn;
-        nBgn = pFat->GetNextPage( nBgn );
-        nRel--;
+        nPage = STG_EOF;
+        return sal_False;
     }
 
-    // special case: seek to 1st byte of new, unallocated page
-    // (in case the file size is a multiple of the page size)
-    if( nBytePos == nSize && nBgn == STG_EOF && !nRel && !nOffset )
-        nBgn = nLast, nOffset = nPageSize;
+    nPage = m_aPagesCache[ nIdx ];
 
-    if( nBgn < 0 && nBgn != STG_EOF )
-    {
-        rIo.SetError( SVSTREAM_FILEFORMAT_ERROR );
-        nBgn = STG_EOF;
-        nOffset = nPageSize;
-    }
-    nPage = nBgn;
-    return sal_Bool( nRel == 0 && nPage >= 0 );
+    return nPage >= 0;
 }
 
 // Retrieve the physical page for a given byte offset.
@@ -817,10 +809,7 @@ void StgDataStrm::Init( sal_Int32 nBgn, sal_Int32 nLen )
     {
         // determine the actual size of the stream by scanning
         // the FAT chain and counting the # of pages allocated
-        bool bOk = buildPageChainCache();
-        if (!bOk)
-            rIo.SetError( ERRCODE_IO_WRONGFORMAT );
-        nSize = m_aPagesCache.size() * nPageSize;
+        scanBuildPageChainCache( &nSize );
     }
 }
 
diff --git a/sot/source/sdstor/stgstrms.hxx b/sot/source/sdstor/stgstrms.hxx
index 125dc67..dff07df 100644
--- a/sot/source/sdstor/stgstrms.hxx
+++ b/sot/source/sdstor/stgstrms.hxx
@@ -79,7 +79,7 @@ protected:
     short nOffset;                      // offset into current page
     short nPageSize;                    // logical page size
     std::vector<sal_Int32> m_aPagesCache;
-    bool buildPageChainCache();
+    void scanBuildPageChainCache(sal_Int32 *pOptionalCalcSize = NULL);
     sal_Bool  Copy( sal_Int32 nFrom, sal_Int32 nBytes );
     StgStrm( StgIo& );
 public:
commit c948e655f837dc3f03b849d388ec52508c96d533
Author: Michael Meeks <michael.meeks at suse.com>
Date:   Mon May 14 09:19:36 2012 +0100

    Add helpful instructions nearby encrypted CVE files.

diff --git a/sc/qa/unit/data/README b/sc/qa/unit/data/README
new file mode 100644
index 0000000..2cc9fb3
--- /dev/null
+++ b/sc/qa/unit/data/README
@@ -0,0 +1,7 @@
+Files with the string 'CVE' in their name are encrypted to avoid
+problems with virus checkers on source code download.; use:
+
+mdecrypt --bare -a arcfour -o hex -k 435645 -s 3 foo.doc # to unencrypt
+mcrypt --bare -a arcfour -o hex -k 435645 -s 3   foo.doc # to create new tests
+
+to get access to the plain files for manual testing.
diff --git a/sd/qa/unit/data/README b/sd/qa/unit/data/README
new file mode 100644
index 0000000..2cc9fb3
--- /dev/null
+++ b/sd/qa/unit/data/README
@@ -0,0 +1,7 @@
+Files with the string 'CVE' in their name are encrypted to avoid
+problems with virus checkers on source code download.; use:
+
+mdecrypt --bare -a arcfour -o hex -k 435645 -s 3 foo.doc # to unencrypt
+mcrypt --bare -a arcfour -o hex -k 435645 -s 3   foo.doc # to create new tests
+
+to get access to the plain files for manual testing.
diff --git a/svtools/qa/cppunit/data/README b/svtools/qa/cppunit/data/README
new file mode 100644
index 0000000..2cc9fb3
--- /dev/null
+++ b/svtools/qa/cppunit/data/README
@@ -0,0 +1,7 @@
+Files with the string 'CVE' in their name are encrypted to avoid
+problems with virus checkers on source code download.; use:
+
+mdecrypt --bare -a arcfour -o hex -k 435645 -s 3 foo.doc # to unencrypt
+mcrypt --bare -a arcfour -o hex -k 435645 -s 3   foo.doc # to create new tests
+
+to get access to the plain files for manual testing.
diff --git a/sw/qa/core/data/README b/sw/qa/core/data/README
new file mode 100644
index 0000000..2cc9fb3
--- /dev/null
+++ b/sw/qa/core/data/README
@@ -0,0 +1,7 @@
+Files with the string 'CVE' in their name are encrypted to avoid
+problems with virus checkers on source code download.; use:
+
+mdecrypt --bare -a arcfour -o hex -k 435645 -s 3 foo.doc # to unencrypt
+mcrypt --bare -a arcfour -o hex -k 435645 -s 3   foo.doc # to create new tests
+
+to get access to the plain files for manual testing.
diff --git a/writerfilter/qa/cppunittests/rtftok/README b/writerfilter/qa/cppunittests/rtftok/README
new file mode 100644
index 0000000..2cc9fb3
--- /dev/null
+++ b/writerfilter/qa/cppunittests/rtftok/README
@@ -0,0 +1,7 @@
+Files with the string 'CVE' in their name are encrypted to avoid
+problems with virus checkers on source code download.; use:
+
+mdecrypt --bare -a arcfour -o hex -k 435645 -s 3 foo.doc # to unencrypt
+mcrypt --bare -a arcfour -o hex -k 435645 -s 3   foo.doc # to create new tests
+
+to get access to the plain files for manual testing.


More information about the Libreoffice-commits mailing list