[Libreoffice-commits] core.git: sax/source

Michael Stahl mstahl at redhat.com
Thu Mar 1 19:58:55 UTC 2018


 sax/source/expatwrap/saxwriter.cxx  |   30 ++++++++++++++++++++++++++++++
 sax/source/tools/fastserializer.cxx |   15 +++++++++++----
 2 files changed, 41 insertions(+), 4 deletions(-)

New commits:
commit 0a1d5af2a18d6a062c45d65689fbce619922dcc8
Author: Michael Stahl <mstahl at redhat.com>
Date:   Wed Feb 28 13:49:14 2018 +0100

    tdf#115429 sax: assert if exporting an invalid XML attribute/element
    
    Add a cheap check for this in both SaxWriter and FastSaxSerializer
    so we can find such bugs earlier, e.g. with the weekly crashtesting.
    
    Don't do a correct check but a cheap & fast one, let's ignore
    non-ASCII characters for now as the only filter with such is UOF
    and that is implemented with XSLT, not this sax code.
    
    Change-Id: I4db8f70ffb23684d4cb4211468519edd6c7c465f
    Reviewed-on: https://gerrit.libreoffice.org/50507
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Michael Stahl <mstahl at redhat.com>

diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx
index a9d8f280706c..b7de667bcf05 100644
--- a/sax/source/expatwrap/saxwriter.cxx
+++ b/sax/source/expatwrap/saxwriter.cxx
@@ -566,6 +566,33 @@ inline void SaxWriterHelper::startDocument()
         nCurrentPos = writeSequence();
 }
 
+void CheckValidName(OUString const& rName)
+{
+#ifdef NDEBUG
+    (void) rName;
+#else
+    assert(!rName.isEmpty());
+    bool hasColon(false);
+    for (sal_Int32 i = 0; i < rName.getLength(); ++i)
+    {
+        auto const c(rName[i]);
+        if (c == ':')
+        {
+            if (hasColon)
+                assert("only one colon allowed");
+            else
+                hasColon = true;
+        }
+        else if (!rtl::isAsciiAlphanumeric(c) && c != '_' && c != '-' && c != '.')
+        {   // note: this will also warn about non-ASCII characters which
+            // are allowed by XML but surely unexpected in LO filters
+            // (OTOH we don't warn about invalid start chars)
+            assert(!"unexpected character in attribute name");
+        }
+    }
+#endif
+}
+
 inline SaxInvalidCharacterError SaxWriterHelper::startElement(const OUString& rName, const Reference< XAttributeList >& xAttribs)
 {
     FinishStartElement();
@@ -581,6 +608,7 @@ inline SaxInvalidCharacterError SaxWriterHelper::startElement(const OUString& rN
         nCurrentPos = writeSequence();
 
     SaxInvalidCharacterError eRet(SAX_NONE);
+    CheckValidName(rName);
     if (!writeString(rName, false, false))
         eRet = SAX_ERROR;
 
@@ -598,6 +626,7 @@ inline SaxInvalidCharacterError SaxWriterHelper::startElement(const OUString& rN
         assert(DebugAttributes.find(rAttrName) == DebugAttributes.end());
         DebugAttributes.insert(rAttrName);
 #endif
+        CheckValidName(rAttrName);
         if (!writeString(rAttrName, false, false))
             eRet = SAX_ERROR;
 
@@ -658,6 +687,7 @@ inline bool SaxWriterHelper::endElement(const OUString& rName)
     if (nCurrentPos == SEQUENCESIZE)
         nCurrentPos = writeSequence();
 
+    CheckValidName(rName);
     bool bRet(writeString( rName, false, false));
 
     mp_Sequence[nCurrentPos] = '>';
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
index 6257853a64a8..9356f3e081d1 100644
--- a/sax/source/tools/fastserializer.cxx
+++ b/sax/source/tools/fastserializer.cxx
@@ -270,11 +270,18 @@ namespace sax_fastparser {
     void FastSaxSerializer::writeId( ::sal_Int32 nElement )
     {
         if( HAS_NAMESPACE( nElement ) ) {
-            writeBytes(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
+            auto const Namespace(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
+            assert(Namespace.getLength() != 0);
+            writeBytes(Namespace);
             writeBytes(sColon, N_CHARS(sColon));
-            writeBytes(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
-        } else
-            writeBytes(mxFastTokenHandler->getUTF8Identifier(nElement));
+            auto const Element(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
+            assert(Element.getLength() != 0);
+            writeBytes(Element);
+        } else {
+            auto const Element(mxFastTokenHandler->getUTF8Identifier(nElement));
+            assert(Element.getLength() != 0);
+            writeBytes(Element);
+        }
     }
 
 #ifdef DBG_UTIL


More information about the Libreoffice-commits mailing list