[Libreoffice-commits] core.git: Branch 'feature/perfwork' - sax/source

Matúš Kukan matus.kukan at collabora.com
Mon Sep 29 14:02:01 PDT 2014


 sax/source/tools/CachedOutputStream.hxx |  102 ++++++++++++++++++++++++++++++++
 sax/source/tools/fastserializer.cxx     |   22 ++----
 sax/source/tools/fastserializer.hxx     |   13 +---
 3 files changed, 115 insertions(+), 22 deletions(-)

New commits:
commit f736eba5a9df8645849a37b08d2764a5f495ab45
Author: Matúš Kukan <matus.kukan at collabora.com>
Date:   Mon Sep 29 16:16:05 2014 +0200

    FastSerializer: Have own fast cache buffer implementation for XOutputStream
    
    Previously comphelper::OSequenceOutputStream was used.
    
    Change-Id: I4dc38c8c62422e59efa1071312497364cdf5be3c

diff --git a/sax/source/tools/CachedOutputStream.hxx b/sax/source/tools/CachedOutputStream.hxx
new file mode 100644
index 0000000..56b17fb
--- /dev/null
+++ b/sax/source/tools/CachedOutputStream.hxx
@@ -0,0 +1,102 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_SAX_SOURCE_TOOLS_CACHEDOUTPUTSTREAM_HXX
+#define INCLUDED_SAX_SOURCE_TOOLS_CACHEDOUTPUTSTREAM_HXX
+
+#include <sal/types.h>
+
+#include <com/sun/star/io/XOutputStream.hpp>
+#include <com/sun/star/uno/Sequence.hxx>
+
+#include <cstring>
+#include <cstdlib>
+
+namespace sax_fastparser {
+
+class CachedOutputStream
+{
+    /// realloc aligns to this value
+    static const sal_Int32 mnMinimumResize = 0x1000;
+    /// When buffer hits this size, it's written to mxOutputStream
+    static const sal_Int32 mnMaximumSize = 0x10000;
+
+    /// Output stream, usually writing data into files.
+    css::uno::Reference< css::io::XOutputStream > mxOutputStream;
+    sal_Int32 mnCacheAllocatedSize;
+    sal_Int32 mnCacheWrittenSize;
+    sal_Int8* mpCache;
+
+public:
+    CachedOutputStream() : mnCacheAllocatedSize(mnMinimumResize)
+                         , mnCacheWrittenSize(0)
+    {
+        mpCache = static_cast<sal_Int8 *>(malloc(mnCacheAllocatedSize));
+    }
+
+    ~CachedOutputStream()
+    {
+        free(mpCache);
+    }
+
+    css::uno::Reference< css::io::XOutputStream > getOutputStream() const
+    {
+        return mxOutputStream;
+    }
+
+    void setOutputStream( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
+    {
+        mxOutputStream = xOutputStream;
+    }
+
+    /// cache string and if limit is hit, flush
+    void writeBytes( const sal_Int8* pStr, sal_Int32 nLen )
+    {
+        // Writer does some elements sorting, so it can accumulate
+        // pretty big strings in FastSaxSerializer::ForMerge.
+        // In that case, just flush data and write immediately.
+        if (nLen > mnMaximumSize)
+        {
+            flush();
+            mxOutputStream->writeBytes( css::uno::Sequence<sal_Int8>(pStr, nLen) );
+            return;
+        }
+
+        // Write when the buffer gets big enough
+        if (mnCacheWrittenSize + nLen > mnMaximumSize)
+            flush();
+
+        sal_Int32 nMissingBytes = mnCacheWrittenSize + nLen - mnCacheAllocatedSize;
+        // Ensure the buffer has enough space left
+        if (nMissingBytes > 0)
+        {
+            // Round off to the next multiple of mnMinimumResize
+            mnCacheAllocatedSize = mnCacheAllocatedSize +
+                ((nMissingBytes + mnMinimumResize - 1) / mnMinimumResize) * mnMinimumResize;
+            mpCache = static_cast<sal_Int8 *>(realloc(mpCache, mnCacheAllocatedSize));
+        }
+        assert(mnCacheWrittenSize + nLen <= mnCacheAllocatedSize);
+        memcpy(mpCache + mnCacheWrittenSize, pStr, nLen);
+        mnCacheWrittenSize += nLen;
+    }
+
+    /// immediately write buffer into mxOutputStream and clear
+    void flush()
+    {
+        mxOutputStream->writeBytes( css::uno::Sequence<sal_Int8>(mpCache, mnCacheWrittenSize) );
+        // and next time write to the beginning
+        mnCacheWrittenSize = 0;
+    }
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
index d6c1a94..02c97e6 100644
--- a/sax/source/tools/fastserializer.cxx
+++ b/sax/source/tools/fastserializer.cxx
@@ -62,9 +62,7 @@ static const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" stand
 
 namespace sax_fastparser {
     FastSaxSerializer::FastSaxSerializer( )
-        : maOutputData(0x4000)
-        , maOutputStream(maOutputData, 1.3, 0x1000, 0x4000)
-        , mxOutputStream()
+        : maCachedOutputStream()
         , mxFastTokenHandler()
         , maMarkStack()
     {
@@ -116,8 +114,7 @@ namespace sax_fastparser {
 
     void FastSaxSerializer::endDocument()
     {
-        maOutputStream.flush();
-        mxOutputStream->writeBytes(maOutputData);
+        maCachedOutputStream.flush();
     }
 
     void FastSaxSerializer::writeId( ::sal_Int32 nElement )
@@ -197,7 +194,12 @@ namespace sax_fastparser {
 
     void FastSaxSerializer::setOutputStream( const ::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream >& xOutputStream )
     {
-        mxOutputStream = xOutputStream;
+        maCachedOutputStream.setOutputStream( xOutputStream );
+    }
+
+    ::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream > FastSaxSerializer::getOutputStream()
+    {
+        return maCachedOutputStream.getOutputStream();
     }
 
     void FastSaxSerializer::setFastTokenHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xFastTokenHandler )
@@ -294,13 +296,7 @@ namespace sax_fastparser {
 
     void FastSaxSerializer::writeOutput( const sal_Int8* pStr, size_t nLen )
     {
-        maOutputStream.writeBytes( pStr, nLen );
-        // Write when the sequence gets big enough
-        if (maOutputStream.getSize() > 0x10000)
-        {
-            maOutputStream.flush();
-            mxOutputStream->writeBytes(maOutputData);
-        }
+        maCachedOutputStream.writeBytes( pStr, nLen );
     }
 
     FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx
index 05db2c7..1b541c1 100644
--- a/sax/source/tools/fastserializer.hxx
+++ b/sax/source/tools/fastserializer.hxx
@@ -23,9 +23,9 @@
 #include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
 #include <com/sun/star/io/XOutputStream.hpp>
 
-#include <comphelper/seqstream.hxx>
 #include <sax/fastattribs.hxx>
 #include <sax/fshelper.hxx>
+#include <CachedOutputStream.hxx>
 
 #include <stack>
 #include <map>
@@ -43,7 +43,7 @@ public:
     FastSaxSerializer();
     ~FastSaxSerializer();
 
-    ::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream > getOutputStream() {return mxOutputStream;}
+    ::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream > getOutputStream();
 
     /** called by the parser when parsing of an XML stream is started.
      */
@@ -140,12 +140,8 @@ public:
     void mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType = sax_fastparser::MERGE_MARKS_APPEND );
 
 private:
-    /// Buffer written to mxOutputStream at the end, called from FSHelper destructor.
-    css::uno::Sequence< sal_Int8 > maOutputData;
-    /// Helper class to dynamically allocate memory when needed for maOutputData.
-    comphelper::OSequenceOutputStream maOutputStream;
-    /// Output stream, usually writing data into files.
-    ::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream > mxOutputStream;
+    /// Helper class to cache data and write in chunks to XOutputStream
+    CachedOutputStream maCachedOutputStream;
     ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxFastTokenHandler;
 
     class ForMerge
@@ -207,7 +203,6 @@ private:
 #endif
 
     void writeFastAttributeList( FastAttributeList* pAttrList );
-    /// Write to maOutputData and if it's big enough flush that to mxOutputStream
     void writeOutput( const sal_Int8* pStr, size_t nLen );
     void writeOutput( const css::uno::Sequence< ::sal_Int8 >& aData );
 


More information about the Libreoffice-commits mailing list