[Libreoffice-commits] core.git: l10ntools/Executable_cfgex.mk l10ntools/Executable_localize.mk l10ntools/Executable_propex.mk l10ntools/Executable_stringex.mk l10ntools/Executable_transex3.mk l10ntools/Executable_treex.mk l10ntools/Executable_uiex.mk l10ntools/Executable_ulfex.mk l10ntools/inc l10ntools/source l10ntools/StaticLibrary_transex.mk

Zolnai Tamás zolnaitamas2000 at gmail.com
Sun Mar 31 11:29:00 PDT 2013


 l10ntools/Executable_cfgex.mk      |    3 
 l10ntools/Executable_localize.mk   |    2 
 l10ntools/Executable_propex.mk     |    2 
 l10ntools/Executable_stringex.mk   |    3 
 l10ntools/Executable_transex3.mk   |    3 
 l10ntools/Executable_treex.mk      |    3 
 l10ntools/Executable_uiex.mk       |    3 
 l10ntools/Executable_ulfex.mk      |    2 
 l10ntools/StaticLibrary_transex.mk |    1 
 l10ntools/inc/helper.hxx           |    3 
 l10ntools/inc/xmlparse.hxx         |    8 -
 l10ntools/source/cfgmerge.cxx      |   32 -------
 l10ntools/source/helper.cxx        |   59 ++++++++-----
 l10ntools/source/helpmerge.cxx     |    6 -
 l10ntools/source/po.cxx            |   82 -------------------
 l10ntools/source/uimerge.cxx       |    2 
 l10ntools/source/xmlparse.cxx      |  157 +++++++++++++++++++++++++++----------
 17 files changed, 167 insertions(+), 204 deletions(-)

New commits:
commit ce51bf1a6ef36bbd1eea751add342cae6f1004d2
Author: Zolnai Tamás <zolnaitamas2000 at gmail.com>
Date:   Sun Mar 31 20:11:57 2013 +0200

    Make a bit cleaner transformation of help strings
    
    *Not escape tags and double quots in tags,
     but find tags(icu regexp) when merge and
     use this infromation to make strings valid.
    *Define a new Quot function for helpex,
     which works with icu UnicodeCharacter.
    *Move tag search to xmlparse.cxx and use icu
     just in helpex.
    *QuotHTML not unescape just replace xml charcters.
     (unescaping is also useless in uimerge.cxx)
    *Move UnQuotHTML() to helper.
     (was used it in xmlparse.cxx and cfgmerge.cxx)
    *Use UnQuotHTML() in uimerge.cxx too.
    
    Change-Id: Ice8940ef69279709a1c5d84c6ae1b0d62a71ca76

diff --git a/l10ntools/Executable_cfgex.mk b/l10ntools/Executable_cfgex.mk
index 7d23fa4..0083d95 100644
--- a/l10ntools/Executable_cfgex.mk
+++ b/l10ntools/Executable_cfgex.mk
@@ -49,8 +49,7 @@ $(eval $(call gb_Executable_add_exception_objects,cfgex,\
 
 $(eval $(call gb_Executable_use_externals,cfgex,\
     boost_headers \
-    icuuc \
-    icui18n \
+    libxml2 \
 ))
 
 # vim:set noet sw=4 ts=4:
diff --git a/l10ntools/Executable_localize.mk b/l10ntools/Executable_localize.mk
index 5f3cc62..b673289 100644
--- a/l10ntools/Executable_localize.mk
+++ b/l10ntools/Executable_localize.mk
@@ -45,8 +45,6 @@ $(eval $(call gb_Executable_add_exception_objects,localize,\
 
 $(eval $(call gb_Executable_use_externals,localize,\
     boost_headers \
-    icuuc \
-    icui18n \
 ))
 
 # vim:set noet sw=4 ts=4:
diff --git a/l10ntools/Executable_propex.mk b/l10ntools/Executable_propex.mk
index f98706f..c5bb06d 100644
--- a/l10ntools/Executable_propex.mk
+++ b/l10ntools/Executable_propex.mk
@@ -30,8 +30,6 @@ $(eval $(call gb_Executable_add_exception_objects,propex,\
 
 $(eval $(call gb_Executable_use_externals,propex,\
     boost_headers \
-    icuuc \
-    icui18n \
 ))
 
 # vim: set noet sw=4 ts=4:
diff --git a/l10ntools/Executable_stringex.mk b/l10ntools/Executable_stringex.mk
index 9545dd5..4f9edfd 100644
--- a/l10ntools/Executable_stringex.mk
+++ b/l10ntools/Executable_stringex.mk
@@ -31,9 +31,6 @@ $(eval $(call gb_Executable_add_exception_objects,stringex,\
 $(eval $(call gb_Executable_use_externals,stringex,\
     boost_headers \
     libxml2 \
-    icuuc \
-    icui18n \
-    icu_headers \
 ))
 
 # vim: set noet sw=4 ts=4:
diff --git a/l10ntools/Executable_transex3.mk b/l10ntools/Executable_transex3.mk
index 5178cd5..b43bc89 100644
--- a/l10ntools/Executable_transex3.mk
+++ b/l10ntools/Executable_transex3.mk
@@ -51,9 +51,6 @@ $(eval $(call gb_Executable_add_exception_objects,transex3,\
 
 $(eval $(call gb_Executable_use_externals,transex3,\
     boost_headers \
-    icuuc \
-    icui18n \
-    icu_headers \
     libxml2 \
 ))
 
diff --git a/l10ntools/Executable_treex.mk b/l10ntools/Executable_treex.mk
index c8ff48c..976dc13 100644
--- a/l10ntools/Executable_treex.mk
+++ b/l10ntools/Executable_treex.mk
@@ -31,9 +31,6 @@ $(eval $(call gb_Executable_add_exception_objects,treex,\
 $(eval $(call gb_Executable_use_externals,treex,\
     boost_headers \
     libxml2 \
-    icuuc \
-    icui18n \
-    icu_headers \
 ))
 
 # vim: set noet sw=4 ts=4:
diff --git a/l10ntools/Executable_uiex.mk b/l10ntools/Executable_uiex.mk
index 0c142c0..064b207 100644
--- a/l10ntools/Executable_uiex.mk
+++ b/l10ntools/Executable_uiex.mk
@@ -31,9 +31,6 @@ $(eval $(call gb_Executable_use_externals,uiex,\
     libxml2 \
     libxslt \
     boost_headers \
-    icuuc \
-    icui18n \
-    icu_headers \
 ))
 
 # vim: set noet sw=4 ts=4:
diff --git a/l10ntools/Executable_ulfex.mk b/l10ntools/Executable_ulfex.mk
index cd9d86e..bd7b7a5 100644
--- a/l10ntools/Executable_ulfex.mk
+++ b/l10ntools/Executable_ulfex.mk
@@ -46,8 +46,6 @@ $(eval $(call gb_Executable_add_exception_objects,ulfex,\
 
 $(eval $(call gb_Executable_use_externals,ulfex,\
     boost_headers \
-    icuuc \
-    icui18n \
 ))
 
 # vim:set noet sw=4 ts=4:
diff --git a/l10ntools/StaticLibrary_transex.mk b/l10ntools/StaticLibrary_transex.mk
index ed25591..cc65614 100644
--- a/l10ntools/StaticLibrary_transex.mk
+++ b/l10ntools/StaticLibrary_transex.mk
@@ -39,7 +39,6 @@ $(eval $(call gb_StaticLibrary_set_include,transex,\
 
 $(eval $(call gb_StaticLibrary_use_externals,transex,\
 	boost_headers \
-	icu_headers \
 	libxml2 \
 ))
 
diff --git a/l10ntools/inc/helper.hxx b/l10ntools/inc/helper.hxx
index 4dd2dd1..1292f44 100644
--- a/l10ntools/inc/helper.hxx
+++ b/l10ntools/inc/helper.hxx
@@ -25,7 +25,8 @@
 
 namespace helper {
 
-OString QuotHTML(const rtl::OString &rString);
+OString QuotHTML( const OString &rString );
+OString UnQuotHTML( const OString& rString );
 
 bool isWellFormedXML( OString const & text );
 
diff --git a/l10ntools/inc/xmlparse.hxx b/l10ntools/inc/xmlparse.hxx
index 7a75872..55867d3 100644
--- a/l10ntools/inc/xmlparse.hxx
+++ b/l10ntools/inc/xmlparse.hxx
@@ -229,11 +229,11 @@ protected:
 class XMLUtil{
 
 public:
-    /// Quot the XML characters and replace \n \t
-    static void         QuotHTML( rtl::OUString &rString );
+    /// Quot the XML characters
+    static OUString QuotHTML( const OUString& rString );
 
-    /// UnQuot the XML characters and restore \n \t
-    static void         UnQuotHTML  ( rtl::OUString &rString );
+    /// UnQuot the XML characters
+    static OUString UnQuotHTML( const OUString &rString );
 };
 
 
diff --git a/l10ntools/source/cfgmerge.cxx b/l10ntools/source/cfgmerge.cxx
index 6956200..fd9474a 100644
--- a/l10ntools/source/cfgmerge.cxx
+++ b/l10ntools/source/cfgmerge.cxx
@@ -27,6 +27,7 @@
 #include "boost/scoped_ptr.hpp"
 #include "rtl/strbuf.hxx"
 
+#include "helper.hxx"
 #include "export.hxx"
 #include "cfgmerge.hxx"
 #include "tokens.h"
@@ -136,33 +137,6 @@ static OString lcl_QuoteHTML( const OString& rString )
     return sReturn.makeStringAndClear();
 }
 
-static OString lcl_UnquoteHTML( const OString& rString )
-{
-    rtl::OStringBuffer sReturn;
-    for (sal_Int32 i = 0; i != rString.getLength();) {
-        if (rString.match("&", i)) {
-            sReturn.append('&');
-            i += RTL_CONSTASCII_LENGTH("&");
-        } else if (rString.match("<", i)) {
-            sReturn.append('<');
-            i += RTL_CONSTASCII_LENGTH("<");
-        } else if (rString.match(">", i)) {
-            sReturn.append('>');
-            i += RTL_CONSTASCII_LENGTH(">");
-        } else if (rString.match(""", i)) {
-            sReturn.append('"');
-            i += RTL_CONSTASCII_LENGTH(""");
-        } else if (rString.match("'", i)) {
-            sReturn.append('\'');
-            i += RTL_CONSTASCII_LENGTH("'");
-        } else {
-            sReturn.append(rString[i]);
-            ++i;
-        }
-    }
-    return sReturn.makeStringAndClear();
-}
-
 } // anonymous namespace
 
 //
@@ -489,7 +463,7 @@ void CfgExport::WorkOnResourceEnd()
                 if ( sText.isEmpty())
                     sText = sFallback;
 
-                sText = lcl_UnquoteHTML( sText );
+                sText = helper::UnQuotHTML( sText );
 
                 common::writePoEntry(
                     "Cfgex", pOutputStream, sPath, pStackData->sResTyp,
@@ -504,7 +478,7 @@ void CfgExport::WorkOnText(
     const rtl::OString &rIsoLang
 )
 {
-    if( rIsoLang.getLength() ) rText = lcl_UnquoteHTML( rText );
+    if( rIsoLang.getLength() ) rText = helper::UnQuotHTML( rText );
 }
 
 
diff --git a/l10ntools/source/helper.cxx b/l10ntools/source/helper.cxx
index cbcf6d1..08a2560 100644
--- a/l10ntools/source/helper.cxx
+++ b/l10ntools/source/helper.cxx
@@ -11,45 +11,58 @@
 
 namespace helper {
 
-rtl::OString QuotHTML(const rtl::OString &rString)
+OString QuotHTML(const OString &rString)
 {
-    rtl::OStringBuffer sReturn;
-    for (sal_Int32 i = 0; i < rString.getLength(); ++i) {
-        switch (rString[i]) {
-        case '\\':
-            if (i < rString.getLength()) {
-                switch (rString[i + 1]) {
-                case '"':
-                case '<':
-                case '>':
-                case '\\':
-                    ++i;
-                    break;
-                }
-            }
-            // fall through
-        default:
-            sReturn.append(rString[i]);
-            break;
-
+    OStringBuffer sReturn;
+    for (sal_Int32 i = 0; i < rString.getLength(); ++i)
+    {
+        switch (rString[i])
+        {
         case '<':
             sReturn.append("<");
             break;
-
         case '>':
             sReturn.append(">");
             break;
-
         case '"':
             sReturn.append(""");
             break;
-
         case '&':
             if (rString.match("&", i))
                 sReturn.append('&');
             else
                 sReturn.append("&");
             break;
+        default:
+            sReturn.append(rString[i]);
+            break;
+        }
+    }
+    return sReturn.makeStringAndClear();
+}
+
+OString UnQuotHTML( const OString& rString )
+{
+    OStringBuffer sReturn;
+    for (sal_Int32 i = 0; i != rString.getLength();) {
+        if (rString.match("&", i)) {
+            sReturn.append('&');
+            i += RTL_CONSTASCII_LENGTH("&");
+        } else if (rString.match("<", i)) {
+            sReturn.append('<');
+            i += RTL_CONSTASCII_LENGTH("<");
+        } else if (rString.match(">", i)) {
+            sReturn.append('>');
+            i += RTL_CONSTASCII_LENGTH(">");
+        } else if (rString.match(""", i)) {
+            sReturn.append('"');
+            i += RTL_CONSTASCII_LENGTH(""");
+        } else if (rString.match("'", i)) {
+            sReturn.append('\'');
+            i += RTL_CONSTASCII_LENGTH("'");
+        } else {
+            sReturn.append(rString[i]);
+            ++i;
         }
     }
     return sReturn.makeStringAndClear();
diff --git a/l10ntools/source/helpmerge.cxx b/l10ntools/source/helpmerge.cxx
index 60ae8ed..98e8165 100644
--- a/l10ntools/source/helpmerge.cxx
+++ b/l10ntools/source/helpmerge.cxx
@@ -242,10 +242,10 @@ void HelpParser::ProcessHelp( LangHashMap* aLangHM , const rtl::OString& sCur ,
                     nPreSpaces++;
                 pEntrys->GetText( sNewText, STRING_TYP_TEXT, sCur , true );
                 OUString sNewdata;
-                if (helper::isWellFormedXML(helper::QuotHTML(sNewText)))
+                OUString sTemp = OStringToOUString(sNewText, RTL_TEXTENCODING_UTF8);
+                if (helper::isWellFormedXML(OUStringToOString(XMLUtil::QuotHTML(sTemp),RTL_TEXTENCODING_UTF8)))
                 {
-                    sNewdata = sSourceText.copy(0,nPreSpaces) +
-                        rtl::OStringToOUString(sNewText, RTL_TEXTENCODING_UTF8);
+                    sNewdata = sSourceText.copy(0,nPreSpaces) + sTemp;
                 }
                 else
                 {
diff --git a/l10ntools/source/po.cxx b/l10ntools/source/po.cxx
index 36e6ebb..bbfe063 100755
--- a/l10ntools/source/po.cxx
+++ b/l10ntools/source/po.cxx
@@ -17,15 +17,12 @@
 #include <string>
 
 #include <boost/crc.hpp>
-#include <unicode/regex.h>
 
 #include "po.hxx"
 
 #define POESCAPED OString("\\n\\t\\r\\\\\\\"")
 #define POUNESCAPED OString("\n\t\r\\\"")
 
-using namespace U_ICU_NAMESPACE;
-
 /** Container of po entry
 
     Provide all file operations related to LibreOffice specific
@@ -282,92 +279,17 @@ namespace
         const OString& rText,const bool bHelpText = false )
     {
         if ( bHelpText )
-            return lcl_UnEscapeText(rText,"\\<\\>\\\"\\\\","<>\"\\");
+            return rText;
         else
             return lcl_UnEscapeText(rText,"\\n\\t\\r","\n\t\r");
     }
 
-    //Find all special tag in a string using a regular expression
-    static void lcl_FindAllTag(
-        const OString& rText,std::vector<OString>& o_vFoundTags )
-    {
-
-        UErrorCode nIcuErr = U_ZERO_ERROR;
-        static const sal_uInt32 nSearchFlags =
-            UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
-        OUString sLocaleText( OStringToOUString(rText,RTL_TEXTENCODING_UTF8) );
-        static const OUString sPattern(
-            "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>");
-        static const UnicodeString sSearchPat(
-            reinterpret_cast<const UChar*>(sPattern.getStr()),
-            sPattern.getLength() );
-        UnicodeString sSource(
-            reinterpret_cast<const UChar*>(
-                sLocaleText.getStr()), sLocaleText.getLength() );
-
-        RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
-        aRegexMatcher.reset( sSource );
-        int64_t nStartPos = 0;
-        while( aRegexMatcher.find(nStartPos, nIcuErr) &&
-            nIcuErr == U_ZERO_ERROR )
-        {
-            UnicodeString sMatch =
-                aRegexMatcher.group(nIcuErr);
-            o_vFoundTags.push_back(
-                OUStringToOString(
-                    OUString(
-                        reinterpret_cast<const sal_Unicode*>(
-                            sMatch.getBuffer()),sMatch.length()),
-                    RTL_TEXTENCODING_UTF8));
-            nStartPos = aRegexMatcher.start(nIcuErr)+1;
-        }
-    }
-
-    //Escape special tags
-    static OString lcl_EscapeTags( const OString& rText )
-    {
-        typedef std::vector<OString> StrVec_t;
-        static const OString vInitializer[] = {
-            "ahelp", "link", "item", "emph", "defaultinline",
-            "switchinline", "caseinline", "variable",
-            "bookmark_value", "image", "embedvar", "alt" };
-        static const StrVec_t vTagsForEscape( vInitializer,
-            vInitializer + sizeof(vInitializer) / sizeof(vInitializer[0]) );
-        StrVec_t vFoundTags;
-        lcl_FindAllTag(rText,vFoundTags);
-        OString sResult = rText;
-        for(StrVec_t::const_iterator pFound  = vFoundTags.begin();
-            pFound != vFoundTags.end(); ++pFound)
-        {
-            bool bEscapeThis = false;
-            for(StrVec_t::const_iterator pEscape = vTagsForEscape.begin();
-                pEscape != vTagsForEscape.end(); ++pEscape)
-            {
-                if (pFound->startsWith("<" + *pEscape) ||
-                    *pFound == "</" + *pEscape + ">")
-                {
-                    bEscapeThis = true;
-                    break;
-                }
-            }
-            if( bEscapeThis || *pFound=="<br/>" ||
-                *pFound =="<help-id-missing/>")
-            {
-                OString sToReplace = "\\<" +
-                    pFound->copy(1,pFound->getLength()-2).
-                        replaceAll("\"","\\\"") + "\\>";
-                sResult = sResult.replaceAll(*pFound, sToReplace);
-            }
-        }
-        return sResult;
-    }
-
     //Escape to get merge string
     static OString lcl_EscapeMergeText(
         const OString& rText,const bool bHelpText = false )
     {
         if ( bHelpText )
-            return lcl_EscapeTags(rText.replaceAll("\\","\\\\"));
+            return rText;
         else
             return lcl_EscapeText(rText,"\n\t\r","\\n\\t\\r");
     }
diff --git a/l10ntools/source/uimerge.cxx b/l10ntools/source/uimerge.cxx
index 76fc9ef..7bebc61 100644
--- a/l10ntools/source/uimerge.cxx
+++ b/l10ntools/source/uimerge.cxx
@@ -64,7 +64,7 @@ int extractTranslations()
                     vIDs.push_back(helper::xmlStrToOString(content));
                     xmlFree(content);
                 }
-                OString sText = helper::xmlStrToOString(xmlNodeGetContent(nodeLevel2));
+                OString sText = helper::UnQuotHTML(helper::xmlStrToOString(xmlNodeGetContent(nodeLevel2)));
                 common::writePoEntry(
                     "Uiex", aPOStream, sInputFileName, vIDs[0],
                     (vIDs.size()>=2) ? vIDs[1] : OString(),
diff --git a/l10ntools/source/xmlparse.cxx b/l10ntools/source/xmlparse.cxx
index 8ba715d..c76d5a6 100644
--- a/l10ntools/source/xmlparse.cxx
+++ b/l10ntools/source/xmlparse.cxx
@@ -20,6 +20,7 @@
 
 #include <iterator> /* std::iterator*/
 
+#include <cassert>
 #include <stdio.h>
 #include <sal/alloca.h>
 
@@ -32,7 +33,9 @@
 #include <osl/thread.hxx>
 #include <osl/process.h>
 #include <rtl/strbuf.hxx>
+#include <unicode/regex.h>
 
+using namespace U_ICU_NAMESPACE;
 using namespace std;
 using namespace osl;
 
@@ -195,12 +198,10 @@ sal_Bool XMLFile::Write( ofstream &rStream , XMLNode *pCur )
                     for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) {
                         rStream << " ";
                         rtl::OUString sData( (*pElement->GetAttributeList())[ j ]->GetName() );
-                        XMLUtil::QuotHTML( sData );
-                        WriteString( rStream , sData );
+                        WriteString( rStream , XMLUtil::QuotHTML( sData ) );
                         rStream << "=\"";
                         sData = (*pElement->GetAttributeList())[ j ]->GetValue();
-                        XMLUtil::QuotHTML( sData );
-                        WriteString( rStream , sData  );
+                        WriteString( rStream , XMLUtil::QuotHTML( sData )  );
                         rStream << "\"";
                     }
                 if ( !pElement->GetChildList())
@@ -218,8 +219,7 @@ sal_Bool XMLFile::Write( ofstream &rStream , XMLNode *pCur )
             case XML_NODE_TYPE_DATA: {
                 XMLData *pData = ( XMLData * ) pCur;
                 rtl::OUString sData( pData->GetData());
-                XMLUtil::QuotHTML( sData );
-                WriteString( rStream, sData );
+                WriteString( rStream, XMLUtil::QuotHTML( sData ) );
             }
             break;
             case XML_NODE_TYPE_COMMENT: {
@@ -717,7 +717,7 @@ void XMLElement::Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement
                 XMLElement *pElement = ( XMLElement * ) pCur;
 
                 if( !pElement->GetName().equalsIgnoreAsciiCase("comment") ){
-                    buffer.append( OUString("\\<") );
+                    buffer.append( OUString("<") );
                     buffer.append( pElement->GetName() );
                     if ( pElement->GetAttributeList()){
                         for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ){
@@ -727,24 +727,24 @@ void XMLElement::Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement
                                 buffer.append( OUString(" ") );
                                 buffer.append( aAttrName );
                                 buffer.append( OUString("=") );
-                                buffer.append( OUString("\\\"") );
+                                buffer.append( OUString("\"") );
                                 buffer.append( (*pElement->GetAttributeList())[ j ]->GetValue() );
-                                buffer.append( OUString("\\\"") );
+                                buffer.append( OUString("\"") );
                             }
                         }
                     }
                     if ( !pElement->GetChildList())
-                        buffer.append( OUString("/\\>") );
+                        buffer.append( OUString("/>") );
                     else {
-                        buffer.append( OUString("\\>") );
+                        buffer.append( OUString(">") );
                         XMLChildNode* tmp=NULL;
                         for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ){
                             tmp = (*pElement->GetChildList())[ k ];
                             Print( tmp, buffer , false);
                         }
-                        buffer.append( OUString("\\</") );
+                        buffer.append( OUString("</") );
                         buffer.append( pElement->GetName() );
-                        buffer.append( OUString("\\>") );
+                        buffer.append( OUString(">") );
                     }
                 }
             }
@@ -1172,41 +1172,114 @@ XMLFile *SimpleXMLParser::Execute( const rtl::OUString &rFileName, XMLFile* pXML
     return pXMLFile;
 }
 
+namespace
+{
 
-void XMLUtil::QuotHTML( OUString &rString )
+static icu::UnicodeString lcl_QuotRange(
+    const icu::UnicodeString& rString, const sal_Int32 nStart,
+    const sal_Int32 nEnd, bool bInsideTag = false )
 {
-    const OString sString(OUStringToOString(rString, RTL_TEXTENCODING_UTF8));
-    rString = OStringToOUString(helper::QuotHTML( sString ), RTL_TEXTENCODING_UTF8);
+    icu::UnicodeString sReturn;
+    assert( nStart > 0 && nStart < rString.length() );
+    assert( nEnd > 0 && nEnd < rString.length() );
+    for (sal_Int32 i = nStart; i <= nEnd; ++i)
+    {
+        switch (rString[i])
+        {
+            case '<':
+                sReturn.append("<");
+                break;
+            case '>':
+                sReturn.append(">");
+                break;
+            case '"':
+                if( !bInsideTag )
+                    sReturn.append(""");
+                else
+                    sReturn.append(rString[i]);
+                break;
+            case '&':
+                if (rString.startsWith("&", i, 5))
+                    sReturn.append('&');
+                else
+                    sReturn.append("&");
+                break;
+            default:
+                sReturn.append(rString[i]);
+                break;
+        }
+    }
+    return sReturn;
 }
 
-void XMLUtil::UnQuotHTML( rtl::OUString &rString ){
-    rtl::OStringBuffer sReturn;
-    rtl::OString sString(rtl::OUStringToOString(rString, RTL_TEXTENCODING_UTF8));
-    for (sal_Int32 i = 0; i != sString.getLength();) {
-        if (sString[i] == '\\') {
-            sReturn.append(RTL_CONSTASCII_STRINGPARAM("\\\\"));
-            ++i;
-        } else if (sString.match("&", i)) {
-            sReturn.append('&');
-            i += RTL_CONSTASCII_LENGTH("&");
-        } else if (sString.match("<", i)) {
-            sReturn.append('<');
-            i += RTL_CONSTASCII_LENGTH("<");
-        } else if (sString.match(">", i)) {
-            sReturn.append('>');
-            i += RTL_CONSTASCII_LENGTH(">");
-        } else if (sString.match(""", i)) {
-            sReturn.append('"');
-            i += RTL_CONSTASCII_LENGTH(""");
-        } else if (sString.match("'", i)) {
-            sReturn.append('\'');
-            i += RTL_CONSTASCII_LENGTH("'");
-        } else {
-            sReturn.append(sString[i]);
-            ++i;
+static bool lcl_isTag( const icu::UnicodeString& rString )
+{
+    const int nSize = 12;
+    static const icu::UnicodeString vTags[nSize] = {
+        "ahelp", "link", "item", "emph", "defaultinline",
+        "switchinline", "caseinline", "variable",
+        "bookmark_value", "image", "embedvar", "alt" };
+
+    for( int nIndex = 0; nIndex < nSize; ++nIndex )
+    {
+        if( rString.startsWith("<" + vTags[nIndex]) ||
+             rString == "</" + vTags[nIndex] + ">" )
+            return true;
+    }
+
+    return rString == "<br/>" || rString =="<help-id-missing/>";
+}
+
+} /// anonymous namespace
+
+OUString XMLUtil::QuotHTML( const OUString &rString )
+{
+    if( rString.trim().isEmpty() )
+        return rString;
+    UErrorCode nIcuErr = U_ZERO_ERROR;
+    static const sal_uInt32 nSearchFlags =
+        UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
+    static const OUString sPattern(
+        "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>");
+    static const UnicodeString sSearchPat(
+        reinterpret_cast<const UChar*>(sPattern.getStr()),
+        sPattern.getLength() );
+
+    icu::UnicodeString sSource(
+        reinterpret_cast<const UChar*>(
+            rString.getStr()), rString.getLength() );
+
+    RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
+    aRegexMatcher.reset( sSource );
+
+    icu::UnicodeString sReturn;
+    int32_t nEndPos = 0;
+    int32_t nStartPos = 0;
+    while( aRegexMatcher.find(nStartPos, nIcuErr) && nIcuErr == U_ZERO_ERROR )
+    {
+        nStartPos = aRegexMatcher.start(nIcuErr);
+        sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos-1));
+        nEndPos = aRegexMatcher.end(nIcuErr);
+        icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr);
+        if( lcl_isTag(sMatch) )
+        {
+            sReturn.append("<");
+            sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-2, true));
+            sReturn.append(">");
         }
+        else
+            sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos-1));
+        ++nStartPos;
     }
-    rString = rtl::OStringToOUString(sReturn.makeStringAndClear(), RTL_TEXTENCODING_UTF8);
+    sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length()-1));
+    sReturn.append('\0');
+    return OUString(reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer()));
+}
+
+OUString  XMLUtil::UnQuotHTML( const OUString& rString )
+{
+    const OString sString(OUStringToOString(rString, RTL_TEXTENCODING_UTF8));
+    return OStringToOUString(helper::UnQuotHTML(sString), RTL_TEXTENCODING_UTF8);
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */


More information about the Libreoffice-commits mailing list