[Libreoffice-commits] core.git: sax/source

Eike Rathke erack at redhat.com
Tue Feb 28 21:30:17 UTC 2017


 sax/source/tools/fastserializer.cxx |   55 +++++++++++++++++++++++++++++++++++-
 1 file changed, 54 insertions(+), 1 deletion(-)

New commits:
commit baca2ec8d5a457512e25b499c3cacc7a66ca853f
Author: Eike Rathke <erack at redhat.com>
Date:   Tue Feb 28 22:14:08 2017 +0100

    FastSaxSerializer: SAL_WARN() when writing invalid XML characters
    
    This catches things for OOXML, that could be escaped using _xHHHH_
    
    Change-Id: I937f67dc5edd3c0e5727d74bebb736dc82bdc53d

diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
index 620fe68..a571829 100644
--- a/sax/source/tools/fastserializer.cxx
+++ b/sax/source/tools/fastserializer.cxx
@@ -101,6 +101,26 @@ namespace sax_fastparser {
         write( sOutput.getStr(), sOutput.getLength(), bEscape );
     }
 
+#if OSL_DEBUG_LEVEL > 0
+    /** Characters not allowed in XML 1.0
+        XML 1.1 would exclude only U+0000
+     */
+    bool invalidChar( char c )
+    {
+        if (static_cast<unsigned char>(c) >= 0x20)
+            return false;
+
+        switch (c)
+        {
+            case 0x09:
+            case 0x0a:
+            case 0x0d:
+                return false;
+        }
+        return true;
+    }
+#endif
+
     void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape )
     {
         if (nLen == -1)
@@ -112,6 +132,7 @@ namespace sax_fastparser {
             return;
         }
 
+        bool bGood = true;
         for (sal_Int32 i = 0; i < nLen; ++i)
         {
             char c = pStr[ i ];
@@ -124,9 +145,26 @@ namespace sax_fastparser {
                 case '"':   writeBytes( """, 6 );   break;
                 case '\n':  writeBytes( "
", 5 );    break;
                 case '\r':  writeBytes( "
", 5 );    break;
-                default:    writeBytes( &c, 1 );          break;
+                default:
+#if OSL_DEBUG_LEVEL > 0
+                            /* FIXME: we should escape such invalid characters
+                             * in the _xHHHH_ form OOXML uses. Note that also a
+                             * literal "_x0008_" would have to be escaped then
+                             * as _x005F_x0008_ (where only the leading '_' is
+                             * escaped as _x005F_). */
+                            if (invalidChar(pStr[i]))
+                            {
+                                bGood = false;
+                                // The SAL_WARN() for the single character is
+                                // issued in writeBytes(), just gather for the
+                                // SAL_WARN_IF() below.
+                            }
+#endif
+                            writeBytes( &c, 1 );         break;
             }
         }
+        SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
+        (void)bGood;
     }
 
     void FastSaxSerializer::endDocument()
@@ -496,6 +534,21 @@ namespace sax_fastparser {
 
     void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
     {
+#if OSL_DEBUG_LEVEL > 0
+        {
+            bool bGood = true;
+            for (size_t i=0; i < nLen; ++i)
+            {
+                if (invalidChar(pStr[i]))
+                {
+                    bGood = false;
+                    SAL_WARN("sax", "FastSaxSerializer::writeBytes - illegal XML character 0x" <<
+                            std::hex << int(static_cast<unsigned char>(pStr[i])));
+                }
+            }
+            SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
+        }
+#endif
         maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
     }
 


More information about the Libreoffice-commits mailing list