[Libreoffice-commits] core.git: Branch 'feature/perfwork' - 4 commits - include/oox oox/source sax/source
Matúš Kukan
matus.kukan at collabora.com
Mon Oct 6 03:37:23 PDT 2014
include/oox/token/tokenmap.hxx | 9 +-
oox/source/token/tokenmap.cxx | 7 -
sax/source/tools/CachedOutputStream.hxx | 46 ++++++++++-
sax/source/tools/fastserializer.cxx | 132 +++++++++++++++-----------------
sax/source/tools/fastserializer.hxx | 13 +--
5 files changed, 125 insertions(+), 82 deletions(-)
New commits:
commit d0bff8fb54cb473037b6b1819e04f679881150f0
Author: Matúš Kukan <matus.kukan at collabora.com>
Date: Mon Oct 6 10:41:48 2014 +0200
FastSerializer: Add simpler writeByte method for single characters.
Change-Id: If1637e9e72270c0dfe8afea1362cad24e7a3f1cd
diff --git a/sax/source/tools/CachedOutputStream.hxx b/sax/source/tools/CachedOutputStream.hxx
index fc74118..a6a2cfc 100644
--- a/sax/source/tools/CachedOutputStream.hxx
+++ b/sax/source/tools/CachedOutputStream.hxx
@@ -100,6 +100,15 @@ public:
mnCacheWrittenSize += nLen;
}
+ void writeByte( const sal_Int8 cChar )
+ {
+ // Write when the buffer gets big enough
+ if (mnCacheWrittenSize + 1 > mnMaximumSize)
+ flush();
+
+ *(pSeq->elements + mnCacheWrittenSize++) = cChar;
+ }
+
/// immediately write buffer into mxOutputStream and clear
void flush()
{
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
index ac8376b..ff089fd 100644
--- a/sax/source/tools/fastserializer.cxx
+++ b/sax/source/tools/fastserializer.cxx
@@ -43,14 +43,6 @@ using ::com::sun::star::io::XOutputStream;
// number of characters without terminating 0
#define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1)
-static const char sClosingBracket[] = ">";
-static const char sSlashAndClosingBracket[] = "/>";
-static const char sColon[] = ":";
-static const char sOpeningBracket[] = "<";
-static const char sOpeningBracketAndSlash[] = "</";
-static const char sQuote[] = "\"";
-static const char sEqualSignAndQuote[] = "=\"";
-static const char sSpace[] = " ";
static const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
namespace sax_fastparser {
@@ -111,10 +103,10 @@ namespace sax_fastparser {
case '"': writeBytes( """, 6 ); break;
case '\n': writeBytes( "
", 5 ); break;
case '\r': writeBytes( "
", 5 ); break;
- default: writeBytes( &cChar, 1 ); break;
+ default: writeByte( cChar ); break;
}
else
- writeBytes( &cChar, 1 );
+ writeByte( cChar );
}
}
@@ -146,7 +138,7 @@ namespace sax_fastparser {
case '"': writeBytes( """, 6 ); break;
case '\n': writeBytes( "
", 5 ); break;
case '\r': writeBytes( "
", 5 ); break;
- default: writeBytes( &c, 1 ); break;
+ default: writeByte( c ); break;
}
}
}
@@ -161,7 +153,7 @@ namespace sax_fastparser {
{
if( HAS_NAMESPACE( nElement ) ) {
writeBytes(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
- writeBytes(sColon, N_CHARS(sColon));
+ writeByte(':');
writeBytes(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
} else
writeBytes(mxFastTokenHandler->getUTF8Identifier(nElement));
@@ -175,8 +167,7 @@ namespace sax_fastparser {
mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
Sequence<sal_Int8> const name(
mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
- return OString(reinterpret_cast<sal_Char const*>(ns.getConstArray()), ns.getLength())
- + OString(sColon, N_CHARS(sColon))
+ return OString(reinterpret_cast<sal_Char const*>(ns.getConstArray()), ns.getLength()) + ":"
+ OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
} else {
Sequence<sal_Int8> const name(
@@ -198,7 +189,7 @@ namespace sax_fastparser {
m_DebugStartedElements.push(Element);
#endif
- writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
+ writeByte('<');
writeId(Element);
if (pAttrList)
@@ -206,7 +197,7 @@ namespace sax_fastparser {
else
writeTokenValueList();
- writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
+ writeByte('>');
}
void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
@@ -218,11 +209,10 @@ namespace sax_fastparser {
m_DebugStartedElements.pop();
#endif
- writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash));
-
+ writeByte('<');
+ writeByte('/');
writeId(Element);
-
- writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
+ writeByte('>');
}
void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
@@ -233,7 +223,7 @@ namespace sax_fastparser {
maMarkStack.top()->setCurrentElement( Element );
}
- writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
+ writeByte('<');
writeId(Element);
if (pAttrList)
@@ -241,7 +231,8 @@ namespace sax_fastparser {
else
writeTokenValueList();
- writeBytes(sSlashAndClosingBracket, N_CHARS(sSlashAndClosingBracket));
+ writeByte('/');
+ writeByte('>');
}
::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream > FastSaxSerializer::getOutputStream()
@@ -256,7 +247,7 @@ namespace sax_fastparser {
#endif
for (size_t j = 0; j < maTokenValues.size(); j++)
{
- writeBytes(sSpace, N_CHARS(sSpace));
+ writeByte(' ');
sal_Int32 nToken = maTokenValues[j].nToken;
writeId(nToken);
@@ -268,11 +259,10 @@ namespace sax_fastparser {
DebugAttributes.insert(nameId);
#endif
- writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
-
+ writeByte('=');
+ writeByte('"');
write(maTokenValues[j].pValue, -1, true);
-
- writeBytes(sQuote, N_CHARS(sQuote));
+ writeByte('"');
}
maTokenValues.clear();
}
@@ -285,7 +275,7 @@ namespace sax_fastparser {
const std::vector< sal_Int32 >& Tokens = pAttrList->getFastAttributeTokens();
for (size_t j = 0; j < Tokens.size(); j++)
{
- writeBytes(sSpace, N_CHARS(sSpace));
+ writeByte(' ');
sal_Int32 nToken = Tokens[j];
writeId(nToken);
@@ -297,11 +287,10 @@ namespace sax_fastparser {
DebugAttributes.insert(nameId);
#endif
- writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
-
+ writeByte('=');
+ writeByte('"');
write(pAttrList->getFastAttributeValue(j), pAttrList->AttributeValueLength(j), true);
-
- writeBytes(sQuote, N_CHARS(sQuote));
+ writeByte('"');
}
}
@@ -373,6 +362,11 @@ namespace sax_fastparser {
maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
}
+ void FastSaxSerializer::writeByte( const sal_Int8 cChar )
+ {
+ maCachedOutputStream.writeByte( cChar );
+ }
+
FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
{
merge( maData, maPostponed, true );
diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx
index 8500b68..0b924bc 100644
--- a/sax/source/tools/fastserializer.hxx
+++ b/sax/source/tools/fastserializer.hxx
@@ -228,6 +228,7 @@ private:
*/
void writeBytes( const ::com::sun::star::uno::Sequence< ::sal_Int8 >& aData );
void writeBytes( const char* pStr, size_t nLen );
+ void writeByte( const sal_Int8 cChar );
};
} // namespace sax_fastparser
commit 65ea70a91300ff63b3e0edc740efbd076e53f6e6
Author: Matúš Kukan <matus.kukan at collabora.com>
Date: Fri Oct 3 23:12:56 2014 +0200
FastSerializer: Also use cache for writing to ForMerge if we are inside mark()
To ensure the correct order of calling ForMerge methods,
call flush always before touching maMarkStack.
This was the missing piece in optimizing write() methods,
because of writeBytes() checking each time what to call.
E.g. for Calc documents we don't use maMarkStack at all.
So, just transfer the output to proper "ForMerge" when inside mark()
and allow optimizations.
This commit makes write() methods almost 1/3 as fast.
Change-Id: I96c13888206c81f87e29b998839f78ea9d5570af
diff --git a/sax/source/tools/CachedOutputStream.hxx b/sax/source/tools/CachedOutputStream.hxx
index 8877bb7..fc74118 100644
--- a/sax/source/tools/CachedOutputStream.hxx
+++ b/sax/source/tools/CachedOutputStream.hxx
@@ -17,9 +17,17 @@
#include <cstring>
#include <cstdlib>
+#include <boost/shared_ptr.hpp>
namespace sax_fastparser {
+class ForMergeBase
+{
+public:
+ virtual ~ForMergeBase() {}
+ virtual void append( const css::uno::Sequence<sal_Int8>& rWhat ) = 0;
+};
+
class CachedOutputStream
{
/// When buffer hits this size, it's written to mxOutputStream
@@ -30,11 +38,16 @@ class CachedOutputStream
sal_Int32 mnCacheWrittenSize;
const css::uno::Sequence<sal_Int8> mpCache;
uno_Sequence *pSeq;
+ bool mbWriteToOutStream;
+ /// ForMerge structure is used for sorting elements in Writer
+ boost::shared_ptr< ForMergeBase > mpForMerge;
public:
CachedOutputStream() : mnCacheWrittenSize(0)
, mpCache(mnMaximumSize)
, pSeq(mpCache.get())
+ , mbWriteToOutStream(true)
+ , mpForMerge(NULL)
{}
~CachedOutputStream() {}
@@ -48,6 +61,20 @@ public:
mxOutputStream = xOutputStream;
}
+ void setOutput( boost::shared_ptr< ForMergeBase > pForMerge )
+ {
+ flush();
+ mbWriteToOutStream = false;
+ mpForMerge = pForMerge;
+ }
+
+ void resetOutputToStream()
+ {
+ flush();
+ mbWriteToOutStream = true;
+ mpForMerge.reset();
+ }
+
/// cache string and if limit is hit, flush
void writeBytes( const sal_Int8* pStr, sal_Int32 nLen )
{
@@ -61,7 +88,10 @@ public:
// In that case, just flush data and write immediately.
if (nLen > mnMaximumSize)
{
- mxOutputStream->writeBytes( css::uno::Sequence<sal_Int8>(pStr, nLen) );
+ if (mbWriteToOutStream)
+ mxOutputStream->writeBytes( css::uno::Sequence<sal_Int8>(pStr, nLen) );
+ else
+ mpForMerge->append( css::uno::Sequence<sal_Int8>(pStr, nLen) );
return;
}
}
@@ -75,7 +105,10 @@ public:
{
// resize the Sequence to written size
pSeq->nElements = mnCacheWrittenSize;
- mxOutputStream->writeBytes( mpCache );
+ if (mbWriteToOutStream)
+ mxOutputStream->writeBytes( mpCache );
+ else
+ mpForMerge->append( mpCache );
// and next time write to the beginning
mnCacheWrittenSize = 0;
}
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
index 0f05ec9..ac8376b 100644
--- a/sax/source/tools/fastserializer.cxx
+++ b/sax/source/tools/fastserializer.cxx
@@ -57,6 +57,7 @@ namespace sax_fastparser {
FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
: maCachedOutputStream()
, maMarkStack()
+ , mbMarkStackEmpty(true)
, mpDoubleStr(NULL)
, mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
{
@@ -152,6 +153,7 @@ namespace sax_fastparser {
void FastSaxSerializer::endDocument()
{
+ assert(mbMarkStackEmpty && maMarkStack.empty());
maCachedOutputStream.flush();
}
@@ -186,8 +188,11 @@ namespace sax_fastparser {
void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
{
- if ( !maMarkStack.empty() )
+ if ( !mbMarkStackEmpty )
+ {
+ maCachedOutputStream.flush();
maMarkStack.top()->setCurrentElement( Element );
+ }
#ifdef DBG_UTIL
m_DebugStartedElements.push(Element);
@@ -222,8 +227,11 @@ namespace sax_fastparser {
void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
{
- if ( !maMarkStack.empty() )
+ if ( !mbMarkStackEmpty )
+ {
+ maCachedOutputStream.flush();
maMarkStack.top()->setCurrentElement( Element );
+ }
writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
@@ -303,28 +311,47 @@ namespace sax_fastparser {
{
boost::shared_ptr< ForMerge > pSort( new ForSort( aOrder ) );
maMarkStack.push( pSort );
+ maCachedOutputStream.setOutput( pSort );
}
else
{
boost::shared_ptr< ForMerge > pMerge( new ForMerge( ) );
maMarkStack.push( pMerge );
+ maCachedOutputStream.setOutput( pMerge );
}
+ mbMarkStackEmpty = false;
}
void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType )
{
- if ( maMarkStack.empty() )
+ SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
+ if ( mbMarkStackEmpty )
return;
+ // flush, so that we get everything in getData()
+ maCachedOutputStream.flush();
+
if ( maMarkStack.size() == 1 && eMergeType != MERGE_MARKS_IGNORE)
{
- writeOutput( maMarkStack.top()->getData() );
+ Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
maMarkStack.pop();
+ mbMarkStackEmpty = true;
+ maCachedOutputStream.resetOutputToStream();
+ maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() );
return;
}
const Int8Sequence aMerge( maMarkStack.top()->getData() );
maMarkStack.pop();
+ if (maMarkStack.empty())
+ {
+ mbMarkStackEmpty = true;
+ maCachedOutputStream.resetOutputToStream();
+ }
+ else
+ {
+ maCachedOutputStream.setOutput( maMarkStack.top() );
+ }
switch ( eMergeType )
{
@@ -338,26 +365,12 @@ namespace sax_fastparser {
void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData )
{
- writeBytes( reinterpret_cast<const char*>(rData.getConstArray()), rData.getLength() );
+ maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() );
}
void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
{
- if ( maMarkStack.empty() )
- writeOutput( reinterpret_cast<const sal_Int8*>(pStr), nLen );
- else
- maMarkStack.top()->append( Sequence< sal_Int8 >(
- reinterpret_cast<const sal_Int8*>(pStr), nLen) );
- }
-
- void FastSaxSerializer::writeOutput( const Sequence< ::sal_Int8 >& aData )
- {
- writeOutput( aData.getConstArray(), aData.getLength() );
- }
-
- void FastSaxSerializer::writeOutput( const sal_Int8* pStr, size_t nLen )
- {
- maCachedOutputStream.writeBytes( pStr, nLen );
+ maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
}
FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx
index 5b740ce..8500b68 100644
--- a/sax/source/tools/fastserializer.hxx
+++ b/sax/source/tools/fastserializer.hxx
@@ -148,11 +148,14 @@ public:
void mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType = sax_fastparser::MERGE_MARKS_APPEND );
private:
- /// Helper class to cache data and write in chunks to XOutputStream
+ /** Helper class to cache data and write in chunks to XOutputStream or ForMerge::append.
+ * Its flush method needs to be called before touching maMarkStack
+ * to ensure correct order of ForSort methods.
+ */
CachedOutputStream maCachedOutputStream;
::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxFastTokenHandler;
- class ForMerge
+ class ForMerge : public ForMergeBase
{
Int8Sequence maData;
Int8Sequence maPostponed;
@@ -168,7 +171,7 @@ private:
#endif
virtual void prepend( const Int8Sequence &rWhat );
- virtual void append( const Int8Sequence &rWhat );
+ virtual void append( const Int8Sequence &rWhat ) SAL_OVERRIDE;
void postpone( const Int8Sequence &rWhat );
protected:
@@ -205,6 +208,7 @@ private:
};
::std::stack< boost::shared_ptr< ForMerge > > maMarkStack;
+ bool mbMarkStackEmpty;
// Would be better to use OStringBuffer instead of these two
// but then we couldn't get the rtl_String* member :-(
rtl_String *mpDoubleStr;
@@ -217,8 +221,6 @@ private:
void writeTokenValueList();
void writeFastAttributeList( FastAttributeList* pAttrList );
- void writeOutput( const sal_Int8* pStr, size_t nLen );
- void writeOutput( const css::uno::Sequence< ::sal_Int8 >& aData );
/** Forward the call to the output stream, or write to the stack.
commit 5b10c07e0d7276b4c16a2ee6b1f1986e1fb6476c
Author: Matúš Kukan <matus.kukan at collabora.com>
Date: Fri Oct 3 20:45:04 2014 +0200
inline TokenMap::getUtf8TokenName
Change-Id: Icd9c6ebc9feb3e7aba28b01729b582a8f49c832a
diff --git a/include/oox/token/tokenmap.hxx b/include/oox/token/tokenmap.hxx
index 6298773..50984e7 100644
--- a/include/oox/token/tokenmap.hxx
+++ b/include/oox/token/tokenmap.hxx
@@ -21,6 +21,7 @@
#define INCLUDED_OOX_TOKEN_TOKENMAP_HXX
#include <vector>
+#include <oox/token/tokens.hxx>
#include <rtl/instance.hxx>
#include <rtl/ustring.hxx>
#include <com/sun/star/uno/Sequence.hxx>
@@ -40,7 +41,13 @@ public:
/** Returns the UTF8 name of the passed token identifier as byte sequence. */
::com::sun::star::uno::Sequence< sal_Int8 >
- getUtf8TokenName( sal_Int32 nToken ) const;
+ getUtf8TokenName( sal_Int32 nToken ) const
+ {
+ SAL_WARN_IF(nToken < 0 || nToken >= XML_TOKEN_COUNT, "oox", "Wrong nToken parameter");
+ if (0 <= nToken && nToken < XML_TOKEN_COUNT)
+ return maTokenNames[ nToken ];
+ return css::uno::Sequence< sal_Int8 >();
+ }
/** Returns the token identifier for the passed UTF8 token name. */
sal_Int32 getTokenFromUtf8(
diff --git a/oox/source/token/tokenmap.cxx b/oox/source/token/tokenmap.cxx
index dcd7284..318aee4 100644
--- a/oox/source/token/tokenmap.cxx
+++ b/oox/source/token/tokenmap.cxx
@@ -80,13 +80,6 @@ sal_Int32 TokenMap::getTokenFromUnicode( const OUString& rUnicodeName ) const
return pToken ? pToken->nToken : XML_TOKEN_INVALID;
}
-Sequence< sal_Int8 > TokenMap::getUtf8TokenName( sal_Int32 nToken ) const
-{
- if( (0 <= nToken) && (static_cast< size_t >( nToken ) < XML_TOKEN_COUNT) )
- return maTokenNames[ static_cast< size_t >( nToken ) ];
- return Sequence< sal_Int8 >();
-}
-
sal_Int32 TokenMap::getTokenPerfectHash( const char *pStr, sal_Int32 nLength ) const
{
const struct xmltoken* pToken = Perfect_Hash::in_word_set( pStr, nLength );
commit 7d332290d64376fc454d5a20b1790646b497bcea
Author: Matúš Kukan <matus.kukan at collabora.com>
Date: Fri Oct 3 16:37:26 2014 +0200
FastSerializer: Simplify OUString write a bit more
Change-Id: Ifa0746d635ec43cdc37867cf94bcc128bb8e2aca
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
index cd8b0ca..0f05ec9 100644
--- a/sax/source/tools/fastserializer.cxx
+++ b/sax/source/tools/fastserializer.cxx
@@ -53,15 +53,6 @@ static const char sEqualSignAndQuote[] = "=\"";
static const char sSpace[] = " ";
static const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
-static bool lcl_isAscii(const OUString& sStr)
-{
- for (sal_Int32 i = 0; i < sStr.getLength(); ++i)
- if (sStr[i] & 0xff80)
- return false;
-
- return true;
-}
-
namespace sax_fastparser {
FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
: maCachedOutputStream()
@@ -101,16 +92,16 @@ namespace sax_fastparser {
void FastSaxSerializer::write( const OUString& sOutput, bool bEscape )
{
- if (!lcl_isAscii(sOutput))
- {
- write( OUStringToOString(sOutput, RTL_TEXTENCODING_UTF8), bEscape );
- return ;
- }
-
- for (sal_Int32 i = 0; i < sOutput.getLength(); ++i)
+ const sal_Int32 nLength = sOutput.getLength();
+ for (sal_Int32 i = 0; i < nLength; ++i)
{
- char c = sOutput[ i ];
- if (bEscape) switch( c )
+ const sal_Unicode cUnicode = sOutput[ i ];
+ const char cChar = cUnicode;
+ if (cUnicode & 0xff80)
+ {
+ write( OString(&cUnicode, 1, RTL_TEXTENCODING_UTF8) );
+ }
+ else if(bEscape) switch( cChar )
{
case '<': writeBytes( "<", 4 ); break;
case '>': writeBytes( ">", 4 ); break;
@@ -119,10 +110,10 @@ namespace sax_fastparser {
case '"': writeBytes( """, 6 ); break;
case '\n': writeBytes( "
", 5 ); break;
case '\r': writeBytes( "
", 5 ); break;
- default: writeBytes( &c, 1 ); break;
+ default: writeBytes( &cChar, 1 ); break;
}
else
- writeBytes( &c, 1 );
+ writeBytes( &cChar, 1 );
}
}
More information about the Libreoffice-commits
mailing list