[Libreoffice-commits] core.git: config_host.mk.in configure.ac i18npool/CustomTarget_breakiterator.mk include/svl svl/Library_svl.mk svl/qa svl/source vcl/source

Stephan Bergmann sbergman at redhat.com
Thu Oct 29 13:03:04 UTC 2015


 config_host.mk.in                      |    1 
 configure.ac                           |   11 -----
 i18npool/CustomTarget_breakiterator.mk |    3 -
 include/svl/urihelper.hxx              |   17 ++++++++
 svl/Library_svl.mk                     |    2 
 svl/qa/unit/test_URIHelper.cxx         |   63 ++++++++++++++++++++++++++++++
 svl/source/misc/urihelper.cxx          |   68 +++++++++++++++++++++++++++++++++
 vcl/source/gdi/pdfwriter_impl.cxx      |   11 +++--
 8 files changed, 160 insertions(+), 16 deletions(-)

New commits:
commit a346dfccd7e342d776dd59eb3ed128557e22a1bf
Author: Stephan Bergmann <sbergman at redhat.com>
Date:   Thu Oct 29 12:17:40 2015 +0100

    tdf#70833: IDNA support when exporing hyperlinks to PDF
    
    Any URLs using non-ASCII IDNA syntax need to be resolved to ASCII-only, as PDF
    URI Action's URI needs to be "encoded in 7-bit ASCII."
    
    Introduce URIHelper::resolveIdnaHost (svl/urihelper.hxx), which internally uses
    icu::IDNA, which requires to bump the minimal --with-system-icu requirement from
    4.2 to 4.6, which means ICU_RECLASSIFIED_CLOSE_PARENTHESIS is always true now.
    
    Change-Id: I0e20d9a20ed2b869fba0cc7c969721411db590b3
    Reviewed-on: https://gerrit.libreoffice.org/19669
    Reviewed-by: Stephan Bergmann <sbergman at redhat.com>
    Tested-by: Stephan Bergmann <sbergman at redhat.com>

diff --git a/config_host.mk.in b/config_host.mk.in
index 8d54bad..5b2e648 100644
--- a/config_host.mk.in
+++ b/config_host.mk.in
@@ -273,7 +273,6 @@ export ICU_CFLAGS=$(gb_SPACE)@ICU_CFLAGS@
 export ICU_LIBS=$(gb_SPACE)@ICU_LIBS@
 export ICU_MAJOR=@ICU_MAJOR@
 export ICU_MINOR=@ICU_MINOR@
-export ICU_RECLASSIFIED_CLOSE_PARENTHESIS=@ICU_RECLASSIFIED_CLOSE_PARENTHESIS@
 export ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER=@ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER@
 export ICU_RECLASSIFIED_HEBREW_LETTER=@ICU_RECLASSIFIED_HEBREW_LETTER@
 export ICU_RECLASSIFIED_PREPEND_SET_EMPTY=@ICU_RECLASSIFIED_PREPEND_SET_EMPTY@
diff --git a/configure.ac b/configure.ac
index 50003a6..4a7f6b3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8949,7 +8949,6 @@ SYSTEM_GENCMN=
 
 ICU_MAJOR=56
 ICU_MINOR=1
-ICU_RECLASSIFIED_CLOSE_PARENTHESIS="TRUE"
 ICU_RECLASSIFIED_PREPEND_SET_EMPTY="TRUE"
 ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER="TRUE"
 ICU_RECLASSIFIED_HEBREW_LETTER="TRUE"
@@ -8974,10 +8973,10 @@ if test "$with_system_icu" = "yes"; then
         ICU_MAJOR=`echo $ICU_VERSION | cut -d"." -f1`
         ICU_MINOR=`echo $ICU_VERSION | cut -d"." -f2`
 
-        if test "$ICU_MAJOR" -ge "49" -o \( "$ICU_MAJOR" = "4" -a "$ICU_MINOR" -ge "2" \); then
+        if test "$ICU_MAJOR" -ge "49" -o \( "$ICU_MAJOR" = "4" -a "$ICU_MINOR" -ge "6" \); then
             AC_MSG_RESULT([OK, $ICU_VERSION])
         else
-            AC_MSG_ERROR([not suitable, only >= 4.2 supported currently])
+            AC_MSG_ERROR([not suitable, only >= 4.6 supported currently])
         fi
     fi
 
@@ -9013,11 +9012,6 @@ You can use --with-system-icu-for-build=force to use it anyway.])
         if test -z "$SYSTEM_GENCMN"; then
             AC_MSG_ERROR([\'gencmn\' not found in \$PATH, install the icu development tool \'gencmn\'])
         fi
-        if test "$ICU_MAJOR" -ge "49" -o \( "$ICU_MAJOR" = "4" -a "$ICU_MINOR" -ge "4" \); then
-            ICU_RECLASSIFIED_CLOSE_PARENTHESIS="TRUE"
-        else
-            ICU_RECLASSIFIED_CLOSE_PARENTHESIS=
-        fi
         if test "$ICU_MAJOR" -ge "49"; then
             ICU_RECLASSIFIED_PREPEND_SET_EMPTY="TRUE"
             ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER="TRUE"
@@ -9055,7 +9049,6 @@ AC_SUBST(SYSTEM_GENCCODE)
 AC_SUBST(SYSTEM_GENCMN)
 AC_SUBST(ICU_MAJOR)
 AC_SUBST(ICU_MINOR)
-AC_SUBST(ICU_RECLASSIFIED_CLOSE_PARENTHESIS)
 AC_SUBST(ICU_RECLASSIFIED_PREPEND_SET_EMPTY)
 AC_SUBST(ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER)
 AC_SUBST(ICU_RECLASSIFIED_HEBREW_LETTER)
diff --git a/i18npool/CustomTarget_breakiterator.mk b/i18npool/CustomTarget_breakiterator.mk
index 4aaf2e5..302ad46 100644
--- a/i18npool/CustomTarget_breakiterator.mk
+++ b/i18npool/CustomTarget_breakiterator.mk
@@ -98,8 +98,7 @@ $(i18npool_BIDIR)/%.brk : $(i18npool_BIDIR)/%.txt $(call gb_ExternalExecutable_g
 #   sed substitution...
 $(i18npool_BIDIR)/%.txt : \
 	$(SRCDIR)/i18npool/source/breakiterator/data/%.txt | $(i18npool_BIDIR)/.dir
-	sed -e ': dummy' \
-		$(if $(ICU_RECLASSIFIED_CLOSE_PARENTHESIS),-e "s#\[:LineBreak =  Close_Punctuation:\]#\[& \[:LineBreak = Close_Parenthesis:\]\]#") \
+	sed -e "s#\[:LineBreak =  Close_Punctuation:\]#\[& \[:LineBreak = Close_Parenthesis:\]\]#" \
 		$(if $(ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER),,\
 			-e '/\[:LineBreak =  Conditional_Japanese_Starter:\]/d' \
 			-e 's# $$CJ##' \
diff --git a/include/svl/urihelper.hxx b/include/svl/urihelper.hxx
index b784188..5f76330 100644
--- a/include/svl/urihelper.hxx
+++ b/include/svl/urihelper.hxx
@@ -152,6 +152,23 @@ SVL_DLLPUBLIC OUString removePassword(OUString const & rURI,
                                       INetURLObject::EncodeMechanism eEncodeMechanism = INetURLObject::WAS_ENCODED,
                                       INetURLObject::DecodeMechanism eDecodeMechanism = INetURLObject::DECODE_TO_IURI,
                                       rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
+
+/** Resolve a URL's host component domain name in IDNA syntax to plain DNS
+    syntax.
+
+    For details, see RFC 5890 "Internationalized Domain Names for Applications
+    (IDNA): Definitions and Document Framework."
+
+    @param: url  An arbitrary string, should be a URI.
+
+    @return  If the input matches the syntax of a hierarchical URL, and it has
+    a host component that matches the IDNA2008 domain name syntax, and that
+    domain name contains any U-labels, return a version of the input URL with
+    the host component resolved to plain DNS syntax.  Otherwise, return the
+    input unchanged.
+*/
+SVL_DLLPUBLIC OUString resolveIdnaHost(OUString const & url);
+
 }
 
 #endif // INCLUDED_SVL_URIHELPER_HXX
diff --git a/svl/Library_svl.mk b/svl/Library_svl.mk
index db08af5..3a7b00d 100644
--- a/svl/Library_svl.mk
+++ b/svl/Library_svl.mk
@@ -21,6 +21,8 @@ $(eval $(call gb_Library_Library,svl))
 
 $(eval $(call gb_Library_use_externals,svl,\
     boost_headers \
+    icu_headers \
+    icuuc \
     mdds_headers \
     libxml2 \
 ))
diff --git a/svl/qa/unit/test_URIHelper.cxx b/svl/qa/unit/test_URIHelper.cxx
index f27149b..37d53e9 100644
--- a/svl/qa/unit/test_URIHelper.cxx
+++ b/svl/qa/unit/test_URIHelper.cxx
@@ -198,9 +198,12 @@ public:
 
     void testFindFirstURLInText();
 
+    void testResolveIdnaHost();
+
     CPPUNIT_TEST_SUITE(Test);
     CPPUNIT_TEST(testNormalizedMakeRelative);
     CPPUNIT_TEST(testFindFirstURLInText);
+    CPPUNIT_TEST(testResolveIdnaHost);
     CPPUNIT_TEST(finish);
     CPPUNIT_TEST_SUITE_END();
 
@@ -423,6 +426,66 @@ void Test::testFindFirstURLInText() {
     }
 }
 
+void Test::testResolveIdnaHost() {
+    OUString input;
+
+    input.clear();
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    input = OUString::fromUtf8("Foo.M\xC3\xBCnchen.de");
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    input = OUString::fromUtf8("foo://Muenchen.de");
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    input = OUString::fromUtf8("foo://-M\xC3\xBCnchen.de");
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    input = OUString::fromUtf8("foo://M\xC3\xBCnchen-.de");
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    input = OUString::fromUtf8("foo://xn--M\xC3\xBCnchen.de");
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    input = OUString::fromUtf8("foo://xy--M\xC3\xBCnchen.de");
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    input = OUString::fromUtf8("foo://.M\xC3\xBCnchen.de");
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    input = OUString::fromUtf8("foo://-bar.M\xC3\xBCnchen.de");
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    input = OUString::fromUtf8("foo://bar-.M\xC3\xBCnchen.de");
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    input = OUString::fromUtf8("foo://xn--bar.M\xC3\xBCnchen.de");
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    input = OUString::fromUtf8("foo://xy--bar.M\xC3\xBCnchen.de");
+    CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
+
+    CPPUNIT_ASSERT_EQUAL(
+        OUString::fromUtf8("foo://M\xC3\xBCnchen@xn--mnchen-3ya.de"),
+        URIHelper::resolveIdnaHost(
+            OUString::fromUtf8("foo://M\xC3\xBCnchen@M\xC3\xBCnchen.de")));
+
+    CPPUNIT_ASSERT_EQUAL(
+        OUString::fromUtf8("foo://xn--mnchen-3ya.de."),
+        URIHelper::resolveIdnaHost(
+            OUString::fromUtf8("foo://M\xC3\xBCnchen.de.")));
+
+    CPPUNIT_ASSERT_EQUAL(
+        OUString::fromUtf8("Foo://bar@xn--mnchen-3ya.de:123/?bar#baz"),
+        URIHelper::resolveIdnaHost(
+            OUString::fromUtf8("Foo://bar@M\xC3\xBCnchen.de:123/?bar#baz")));
+
+    CPPUNIT_ASSERT_EQUAL(
+        OUString::fromUtf8("foo://xn--mnchen-3ya.de"),
+        URIHelper::resolveIdnaHost(
+            OUString::fromUtf8("foo://Mu\xCC\x88nchen.de")));
+}
+
 css::uno::Reference< css::uno::XComponentContext > Test::m_context;
 
 CPPUNIT_TEST_SUITE_REGISTRATION(Test);
diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx
index ab47bb6..bb5678a 100644
--- a/svl/source/misc/urihelper.cxx
+++ b/svl/source/misc/urihelper.cxx
@@ -17,6 +17,10 @@
  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  */
 
+#include <sal/config.h>
+
+#include <unicode/idna.h>
+
 #include <svl/urihelper.hxx>
 #include <com/sun/star/ucb/Command.hpp>
 #include <com/sun/star/ucb/IllegalIdentifierException.hpp>
@@ -725,4 +729,68 @@ OUString URIHelper::removePassword(OUString const & rURI,
                aObj.GetURLNoPass(eDecodeMechanism, eCharset);
 }
 
+OUString URIHelper::resolveIdnaHost(OUString const & url) {
+    css::uno::Reference<css::uri::XUriReference> uri(
+        css::uri::UriReferenceFactory::create(
+            comphelper::getProcessComponentContext())
+        ->parse(url));
+    if (!(uri.is() && uri->hasAuthority())) {
+        return url;
+    }
+    auto auth(uri->getAuthority());
+    sal_Int32 hostStart = auth.indexOf('@') + 1;
+    sal_Int32 hostEnd = auth.getLength() - 1;
+    while (hostEnd > hostStart && rtl::isAsciiDigit(auth[hostEnd])) {
+        --hostEnd;
+    }
+    if (!(hostEnd > hostStart && auth[hostEnd] == ':')) {
+        hostEnd = auth.getLength() - 1;
+    }
+    auto asciiOnly = true;
+    for (auto i = hostStart; i != hostEnd; ++i) {
+        if (!rtl::isAscii(auth[i])) {
+            asciiOnly = false;
+            break;
+        }
+    }
+    if (asciiOnly) {
+        // Avoid icu::IDNA case normalization in purely non-IDNA domain names:
+        return url;
+    }
+    UErrorCode e = U_ZERO_ERROR;
+    std::unique_ptr<icu::IDNA> idna(
+        icu::IDNA::createUTS46Instance(
+            (UIDNA_USE_STD3_RULES | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ
+             | UIDNA_CHECK_CONTEXTO),
+            e));
+    if (U_FAILURE(e)) {
+        SAL_WARN("vcl.gdi", "icu::IDNA::createUTS46Instance " << e);
+        return url;
+    }
+    icu::UnicodeString ascii;
+    icu::IDNAInfo info;
+    idna->nameToASCII(
+        icu::UnicodeString(
+            reinterpret_cast<UChar const *>(auth.getStr() + hostStart),
+            hostEnd - hostStart),
+        ascii, info, e);
+    if (U_FAILURE(e) || info.hasErrors()) {
+        return url;
+    }
+    OUStringBuffer buf(uri->getScheme());
+    buf.append("://").append(auth.getStr(), hostStart);
+    buf.append(
+        reinterpret_cast<sal_Unicode const *>(ascii.getBuffer()),
+        ascii.length());
+    buf.append(auth.getStr() + hostEnd, auth.getLength() - hostEnd)
+        .append(uri->getPath());
+    if (uri->hasQuery()) {
+        buf.append('?').append(uri->getQuery());
+    }
+    if (uri->hasFragment()) {
+        buf.append('#').append(uri->getFragment());
+    }
+    return buf.makeStringAndClear();
+}
+
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index fcd04b4..ae00978 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -44,6 +44,7 @@
 #include <rtl/crc.h>
 #include <rtl/digest.h>
 #include <rtl/ustrbuf.hxx>
+#include <svl/urihelper.hxx>
 #include <tools/debug.hxx>
 #include <tools/fract.hxx>
 #include <tools/stream.hxx>
@@ -4495,8 +4496,10 @@ we check in the following sequence:
 // are the correct one!!
 
 // extract target file type
+            auto url(URIHelper::resolveIdnaHost(rLink.m_aURL));
+
             INetURLObject aDocumentURL( m_aContext.BaseURL );
-            INetURLObject aTargetURL( rLink.m_aURL );
+            INetURLObject aTargetURL( url );
             bool bSetGoToRMode = false;
             bool    bTargetHasPDFExtension = false;
             INetProtocol eTargetProtocol = aTargetURL.GetProtocol();
@@ -4507,7 +4510,7 @@ we check in the following sequence:
             // getting the needed URL information from the current document path
             if( eTargetProtocol == INetProtocol::NotValid )
             {
-                if( rLink.m_aURL.getLength() > 4 && rLink.m_aURL.startsWith("\\\\\\\\"))
+                if( url.getLength() > 4 && url.startsWith("\\\\\\\\"))
                 {
                     bIsUNCPath = true;
                 }
@@ -4516,7 +4519,7 @@ we check in the following sequence:
                     INetURLObject aNewBase( aDocumentURL );//duplicate document URL
                     aNewBase.removeSegment(); //remove last segment from it, obtaining the base URL of the
                                               //target document
-                    aNewBase.insertName( rLink.m_aURL );
+                    aNewBase.insertName( url );
                     aTargetURL = aNewBase;//reassign the new target URL
                     //recompute the target protocol, with the new URL
                     //normal URL processing resumes
@@ -4564,7 +4567,7 @@ we check in the following sequence:
             {
                 aLine.append( "/Launch/Win<</F" );
                 // INetURLObject is not good with UNC paths, use original path
-                appendLiteralStringEncrypt(  rLink.m_aURL, rLink.m_nObject, aLine, osl_getThreadTextEncoding() );
+                appendLiteralStringEncrypt( url, rLink.m_nObject, aLine, osl_getThreadTextEncoding() );
                 aLine.append( ">>" );
             }
             else


More information about the Libreoffice-commits mailing list