[Libreoffice-commits] core.git: sw/source

Stephan Bergmann sbergman at redhat.com
Fri Oct 7 21:58:23 UTC 2016


 sw/source/filter/html/htmlatr.cxx |   21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

New commits:
commit 5227e5b269be52d4ab614db5f1b3a52588b56404
Author: Stephan Bergmann <sbergman at redhat.com>
Date:   Fri Oct 7 23:54:56 2016 +0200

    Non-BMP chars are now covered by HTMLOutFUncs::Out_Char
    
    ...and use the rtl/character.hxx functions for surrogate handling
    
    Change-Id: I32002f990e4f211932a99652cdfd7992d3ea6329

diff --git a/sw/source/filter/html/htmlatr.cxx b/sw/source/filter/html/htmlatr.cxx
index a2f8463..d2aad8c 100644
--- a/sw/source/filter/html/htmlatr.cxx
+++ b/sw/source/filter/html/htmlatr.cxx
@@ -2438,16 +2438,13 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode )
 
             if( bOutChar )
             {
-                // #i120442#: get the UTF-32 codepoint by converting an eventual UTF-16 unicode surrogate pair
-                sal_uInt64 c = rStr[nStrPos];
-                if( nStrPos < nEnd - 1 )
+                sal_uInt32 c = rStr[nStrPos];
+                if( rtl::isHighSurrogate(c) && nStrPos < nEnd - 1 )
                 {
                     const sal_Unicode d = rStr[nStrPos + 1];
-                    if( (c >= 0xd800 && c <= 0xdbff) && (d >= 0xdc00 && d <= 0xdfff) )
+                    if( rtl::isLowSurrogate(d) )
                     {
-                        sal_uInt64 templow = d&0x03ff;
-                        sal_uInt64 temphi = ((c&0x03ff) + 0x0040)<<10;
-                        c = temphi|templow;
+                        c = rtl::combineSurrogates(c, d);
                         nStrPos++;
                     }
                 }
@@ -2486,14 +2483,6 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode )
                         HtmlWriter aHtml(rWrt.Strm());
                         aHtml.single(OOO_STRING_SVTOOLS_HTML_linebreak);
                     }
-                    // #i120442#: if c is outside the unicode base plane output it as "&#******;"
-                    else if( c > 0xffff)
-                    {
-                        OString sOut("&#");
-                        sOut += OString::number( (sal_uInt64)c );
-                        sOut += ";";
-                        rWrt.Strm().WriteCharPtr( sOut.getStr() );
-                    }
                     else if (c == CH_TXT_ATR_FORMELEMENT)
                     {
                         // Placeholder for a single-point fieldmark.
@@ -2503,7 +2492,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode )
                         rHTMLWrt.OutPointFieldmarks(aMarkPos);
                     }
                     else
-                        HTMLOutFuncs::Out_Char( rWrt.Strm(), (sal_Unicode)c, aContext, &rHTMLWrt.m_aNonConvertableCharacters );
+                        HTMLOutFuncs::Out_Char( rWrt.Strm(), c, aContext, &rHTMLWrt.m_aNonConvertableCharacters );
 
                     // if a paragraph's last character is a hard line break
                     // then we need to add an extra <br>


More information about the Libreoffice-commits mailing list