[Libreoffice-commits] core.git: Branch 'libreoffice-6-3' - svtools/source

Michael Stahl (via logerrit) logerrit at kemper.freedesktop.org
Tue Oct 29 14:50:11 UTC 2019


 svtools/source/svhtml/parhtml.cxx |   19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

New commits:
commit 657d77811d38e5027b8521ad52a483edc69d67c3
Author:     Michael Stahl <Michael.Stahl at cib.de>
AuthorDate: Mon Oct 28 14:31:23 2019 +0100
Commit:     Caolán McNamara <caolanm at redhat.com>
CommitDate: Tue Oct 29 15:49:21 2019 +0100

    svl: HTMLParser: stop inserting control character garbage into Writer
    
    E.g. rhbz433940-1.html contains literal ^G characters that are inserted
    as-is into SwTextNodes.
    
    This now triggers assert about CH_TXT_ATR_FIELDSTART in
    SwSubFont::GetTextSize_() that was added in
    19a559b0ec9b806519c405651d6d2b2e14712b4a.
    
    Change-Id: I6aa7de41a04069e15b40865fd57894dae0fc10db
    Reviewed-on: https://gerrit.libreoffice.org/81606
    Reviewed-by: Michael Stahl <michael.stahl at cib.de>
    Tested-by: Michael Stahl <michael.stahl at cib.de>
    (cherry picked from commit 35d248cab1f0d4800f72abb5cb6afb56f40d9083)
    Reviewed-on: https://gerrit.libreoffice.org/81652
    Tested-by: Jenkins
    Reviewed-by: Caolán McNamara <caolanm at redhat.com>
    Tested-by: Caolán McNamara <caolanm at redhat.com>

diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index ade6dfa8691b..79aee33de3a6 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -31,6 +31,7 @@
 #include <tools/datetime.hxx>
 #include <unotools/datetime.hxx>
 #include <svl/inettype.hxx>
+#include <svl/lngmisc.hxx>
 #include <com/sun/star/beans/PropertyAttribute.hpp>
 #include <com/sun/star/document/XDocumentProperties.hpp>
 
@@ -456,8 +457,12 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
                     else
                         nNextCh = 0U;
 
-                    if ( ! rtl::isUnicodeCodePoint( cChar ) )
+                    if (!rtl::isUnicodeCodePoint(cChar)
+                        || (linguistic::IsControlChar(cChar)
+                            && cChar != '\r' && cChar != '\n' && cChar != '\t'))
+                    {
                         cChar = '?';
+                    }
                 }
                 else if( rtl::isAsciiAlpha( nNextCh ) )
                 {
@@ -753,8 +758,11 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
             else
             {
                 do {
+                    if (!linguistic::IsControlChar(nNextCh))
+                    {
                     // All remaining characters make their way into the text.
-                    sTmpBuffer.appendUtf32( nNextCh );
+                        sTmpBuffer.appendUtf32( nNextCh );
+                    }
                     if( MAX_LEN == sTmpBuffer.getLength() )
                     {
                         aToken += sTmpBuffer;
@@ -989,8 +997,11 @@ HtmlTokenId HTMLParser::GetNextRawToken()
             }
             [[fallthrough]];
         default:
-            // all remaining characters are appended to the buffer
-            sTmpBuffer.appendUtf32( nNextCh );
+            if (!linguistic::IsControlChar(nNextCh) || nNextCh == '\t')
+            {
+                // all remaining characters are appended to the buffer
+                sTmpBuffer.appendUtf32( nNextCh );
+            }
             break;
         }
 


More information about the Libreoffice-commits mailing list