[Libreoffice-commits] core.git: Branch 'libreoffice-4-0' - svtools/inc svtools/source

Eike Rathke erack at redhat.com
Fri May 17 00:22:47 PDT 2013


 svtools/inc/svtools/parhtml.hxx   |    2 ++
 svtools/source/svhtml/parhtml.cxx |   22 ++++++++++++++++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

New commits:
commit 2d4e570f9d6e6464cf597a1a90e84ccb3b232b5a
Author: Eike Rathke <erack at redhat.com>
Date:   Thu May 16 23:53:30 2013 +0200

    resolved fdo#56772 keep track of HTML ON/OFF tokens
    
    Regression introduced with 11cbcb8b08b540b144a5df744e9fba0b6ba8144a
    followed by 56d6589368c2e88cffec0c2e518f7c90863eeae6
    
    Deep from svtools/source/svhtml/parhtml.cxx HTMLParser::_GetNextToken()
    only a HTML_TABLEDATA_OFF was generated for <td .../> without a matching
    HTML_TABLEDATA_ON (actually same for all <XXX/> ON/OFF tokens). This
    confuses a state machine that doesn't keep track of such unexpected
    closures and also expects all attributes of an element at an ON token.
    Only the parser knows this is actually one token but needs to generate
    separate ON/OFF tokens.
    
    These bugs mentioned in the original code and commits are still fixed
    with this change:
    https://bugs.freedesktop.org/show_bug.cgi?id=34666
    https://bugs.freedesktop.org/show_bug.cgi?id=36080
    https://bugs.freedesktop.org/show_bug.cgi?id=36390
    
    Change-Id: I2b3190d297a35ee3dfda95f9a4841f7c53ed4a92
    (cherry picked from commit bb7360ca9929e9b395b3c903f460c9ed5efdce4d)
    Reviewed-on: https://gerrit.libreoffice.org/3925
    Reviewed-by: Fridrich Strba <fridrich at documentfoundation.org>
    Tested-by: Fridrich Strba <fridrich at documentfoundation.org>

diff --git a/svtools/inc/svtools/parhtml.hxx b/svtools/inc/svtools/parhtml.hxx
index f8c0c61..fdfeeab 100644
--- a/svtools/inc/svtools/parhtml.hxx
+++ b/svtools/inc/svtools/parhtml.hxx
@@ -144,6 +144,8 @@ private:
 
     sal_uInt32 nPre_LinePos;            // Pos in der Line im PRE-Tag
 
+    int mnPendingOffToken;          ///< OFF token pending for a <XX.../> ON/OFF ON token
+
     String aEndToken;
 
 protected:
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index 68232e5..91bae59 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -297,7 +297,8 @@ HTMLParser::HTMLParser( SvStream& rIn, bool bReadNewDoc ) :
     bEndTokenFound(false),
     bPre_IgnoreNewPara(false),
     bReadNextChar(false),
-    bReadComment(false)
+    bReadComment(false),
+    mnPendingOffToken(0)
 {
     //#i76649, default to UTF-8 for HTML unless we know differently
     SetSrcEncoding(RTL_TEXTENCODING_UTF8);
@@ -1057,6 +1058,15 @@ int HTMLParser::_GetNextToken()
     int nRet = 0;
     sSaveToken.Erase();
 
+    if (mnPendingOffToken)
+    {
+        // HTML_<TOKEN>_OFF generated for HTML_<TOKEN>_ON
+        nRet = mnPendingOffToken;
+        mnPendingOffToken = 0;
+        aToken.Erase();
+        return nRet;
+    }
+
     // Delete options
     if (!maOptions.empty())
         maOptions.clear();
@@ -1204,10 +1214,14 @@ int HTMLParser::_GetNextToken()
                         ScanText( '>' );
 
                         // fdo#34666 fdo#36080 fdo#36390: closing "/>"?:
-                        // return HTML_<TOKEN>_OFF instead of HTML_<TOKEN>_ON
+                        // generate pending HTML_<TOKEN>_OFF for HTML_<TOKEN>_ON
+                        // Do not convert this to a single HTML_<TOKEN>_OFF
+                        // which lead to fdo#56772.
                         if ((HTML_TOKEN_ONOFF & nRet) && (aToken.Len() >= 1) &&
-                            ('/' == aToken.GetChar(aToken.Len()-1))) {
-                            ++nRet; // HTML_<TOKEN>_ON -> HTML_<TOKEN>_OFF;
+                                ('/' == aToken.GetChar(aToken.Len()-1)))
+                        {
+                            mnPendingOffToken = nRet + 1;       // HTML_<TOKEN>_ON -> HTML_<TOKEN>_OFF
+                            aToken.Erase( aToken.Len()-1, 1);   // remove trailing '/'
                         }
                         if( sal_Unicode(EOF) == nNextCh && rInput.IsEof() )
                         {


More information about the Libreoffice-commits mailing list