[Libreoffice-commits] core.git: include/svtools sw/qa sw/source

Miklos Vajna vmiklos at collabora.co.uk
Fri Dec 15 22:54:12 UTC 2017


 include/svtools/htmlkywd.hxx            |    4 ++++
 sw/qa/extras/htmlexport/data/hello.html |    8 ++++++++
 sw/qa/extras/htmlexport/htmlexport.cxx  |   17 +++++++++++++++++
 sw/source/filter/html/wrthtml.cxx       |   15 ++++++++++++---
 sw/source/filter/html/wrthtml.hxx       |    2 ++
 5 files changed, 43 insertions(+), 3 deletions(-)

New commits:
commit aad9c6da5154a89c6ef02214d1122d4b444eea23
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Fri Dec 15 17:24:41 2017 +0100

    sw HTML export: add a filter option to produce XHTML
    
    Add initial support for writing XHTML markup as part of the HTML filter.
    This already emits valid XHTML for hello world documents.
    
    Times for 100 hello world inputs: 16032 -> 9957 ms is spent in ODT-load
    + export + close (62% of original).
    
    Change-Id: I51a0a20985958fbc817c196d3a966e55dcb3f13f
    Reviewed-on: https://gerrit.libreoffice.org/46567
    Reviewed-by: Miklos Vajna <vmiklos at collabora.co.uk>
    Tested-by: Jenkins <ci at libreoffice.org>

diff --git a/include/svtools/htmlkywd.hxx b/include/svtools/htmlkywd.hxx
index 4cc24949fe2d..37a77d938a51 100644
--- a/include/svtools/htmlkywd.hxx
+++ b/include/svtools/htmlkywd.hxx
@@ -24,6 +24,9 @@
 
 #define OOO_STRING_SVTOOLS_HTML_doctype40 \
     "HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\""
+#define OOO_STRING_SVTOOLS_XHTML_doctype11 \
+    "html PUBLIC \"-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN\" " \
+    "\"http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd\""
 
 // these are only switched on
 #define OOO_STRING_SVTOOLS_HTML_area "area"
@@ -507,6 +510,7 @@
 #define OOO_STRING_SVTOOLS_HTML_O_format "format"
 #define OOO_STRING_SVTOOLS_HTML_O_frame "frame"
 #define OOO_STRING_SVTOOLS_HTML_O_lang "lang"
+#define OOO_STRING_SVTOOLS_XHTML_O_lang "xml:lang"
 #define OOO_STRING_SVTOOLS_HTML_O_method "method"
 #define OOO_STRING_SVTOOLS_HTML_O_rel "rel"
 #define OOO_STRING_SVTOOLS_HTML_O_rev "rev"
diff --git a/sw/qa/extras/htmlexport/data/hello.html b/sw/qa/extras/htmlexport/data/hello.html
new file mode 100644
index 000000000000..bc4180d17bf7
--- /dev/null
+++ b/sw/qa/extras/htmlexport/data/hello.html
@@ -0,0 +1,8 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html>
+  <head>
+    <title>Title of document</title>
+  </head>
+  <body>hello world</body>
+</html>
diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx b/sw/qa/extras/htmlexport/htmlexport.cxx
index 1ea03e880de7..74ffc3818685 100644
--- a/sw/qa/extras/htmlexport/htmlexport.cxx
+++ b/sw/qa/extras/htmlexport/htmlexport.cxx
@@ -48,6 +48,8 @@ private:
             setFilterOptions("SkipImages");
         else if (getTestName().indexOf("EmbedImages") != -1)
             setFilterOptions("EmbedImages");
+        else if (getTestName().indexOf("XHTML") != -1)
+            setFilterOptions("XHTML");
         else
             setFilterOptions("");
 
@@ -306,6 +308,21 @@ DECLARE_HTMLEXPORT_TEST(testEmbedImagesEnabled, "textAndImage.docx")
     assertXPath(pDoc, "/html/body/p/img", "src", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAACXBIWXMAAA3ZAAAN2QHmodeGAAAFyUlEQVR4nO1Za2gcVRQ+szuzO9ndZtfEktY2tVorVdNKxRpQweqPmh/B1w8JhCL4R4RYiPhCfKFEfwX/2VYULJIqJFDqg4KC1EaoEoyGhFJClBgbN+a9z9ndeXnOzJ3szCbbQuY2i5ADh5udmXvP953z3cdMRNM04f9sYq0B+LVNArW2TQJXM0EQdmKTQL+Mi4V2PWJwJ4CgBUkKdImi9F48HpGikTptfj4l19fLX2cyxeeQyALPeNwJxOOx/r17djz6+afdsX233QBg5CCXWYS3es62n/js4mXkdyeSmOMVjysBURQ7djVvbRv87p2oLJUAtHmLQDSUg963W8MhMSscPzXWh48e4RaT10BkKJkXens6orKYRvBZC7zlut2+231r6MOTI4exCgmswjKPmFwJFIvFu+5pqbczr7vB22QkMwd7d8vK2Hj+AD5+gUdMrgRCUnD+yt9T8ZubGsrAHSKMTHK2SDGTvGJyJaBp2g/9Z4abH7j7YMjJupvA0Mgi5BVDxUcneMXkSiCTVV/75ItLTx1qCTR2PpbwEJiYTMHjXamcUjSfMTmeIH0TwAm5H5s96JPou3N57f3n3xh99URfuL79IVmOR1UYGlXUL89phqrCabuLcAjbRvQCcjlfMwII5PCWaPib1ntv0S4O/Skn4jHhjn27FMPQ4d/ZJePjgWzJNExBDCW0gwciWp0ceLpQLHaMjM3UNTeZhcU0CLGI8FE2b75SEwJhSWx/ueuR6JvdD8Kx18+Yl/4oCN8PHEP9KyibAoC50krO76Hf/4EjnUkYHYDY+F8A93XCszhUbQiQSUHcsPQFJINz00TX5hhodCJS2WpLIAbBD
 ARAkMN+o/OYxEYeQS0y0Gz3rQTtJmQs+0ftMp8EDFsm+qLdmvhbn7cBr8o+k5Se5oOcGZ8K6E4FBHb+8ejfS0jLcYBdNn4SInBm0CuhSiIkMc7fEPxLiIDqKgMZYgQqdA86F7Brmf8KmCQhzV6BKLvWHGBy2QDzEKAtEpsgu04eoMvsNqVRY62OO6hhXbXAsieIhL60AbDLZhFA3AQa6w8xsN9hybegy+wZAkspzaAvk2OfTFhaIcds4z+SiQiEABLwrejb0ZvQ8VBvESJiDkgigbPQIjGLfoU9U1NzZCKBnW0iEkEPu+6RUWqJCAGuQ4+CXaXIxsJdbQSSdE27iyMTEjG+jVsSIrBEziFCz1IViOBOzYD6C8OgPvEwSIPDYDYkvJJK4nx+qReMvg9W+oOEo2UVEEbGAcYmrMwovgjQZEQZlRgJhwBVIs6c5EQZp2yHGDHygK7Dzz/9Bvtbj8I2/Ft5EVv34HO4PZw97w24rdFqCvcfhZIQgEJOgSd9EUDwjoQINA1/I5QrILN7QZc7RtnWMjk4zn634QGtzT04ZhoQYEDH2gYD5esBAUo5xYz7Ab5CAGx9Oxp3iDQwAtJV+jp9qlo2b7cpnPYNXri0Ygex+r53OJKQiYPReypJiDROmqQDC8khAXYVnNXIWSeFa4F3E1hKewmwQUIYt+D39dLaB9ggJRyQCBQYGZoLtKzS8uosq4Eq46wJwiGwnFmzj8rj3dizE7MJXQS7EhQ+BTaRm8CeHzSZSVZO9t0VWWVZtr5UIUDzz1jZ0XkQsBDZWdFwcOfokAV79yVJUTVirn4kLQnKc8lLoHoFqJI70BcwDj2lrZdI1cPcGrKir8oRKFeAwEcZKfJVExJXKGuiVyFwO/o0+gx6ipby9UjqmqdRlhmSVRGDpKEsF2dnpi/NdPxIVfZdTlukiECw4hYBpaQYbBzn0MifgCeqnSF3EIVViGQwfbIfSl/9CCoDA5PT1vjJ3lOw/fQ5Kw
 mmqtESav39K7rKXF/vhPb9PoBxVUaiZ2YBBtGdsxQZZfmX5AK0oFtAwc76FPbj8nLM5dMiy14aiXwLthyc5dZgm9UUjzhrGddvo4yIDtfzHbLCNv9LWWvbJFBr2yRQa/sP25LGjrtpN08AAAAASUVORK5CYII=");
 }
 
+DECLARE_HTMLEXPORT_TEST(testXHTML, "hello.html")
+{
+    OString aExpected("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML");
+    SvStream* pStream = maTempFile.GetStream(StreamMode::READ);
+    CPPUNIT_ASSERT(pStream);
+    OString aActual(read_uInt8s_ToOString(*pStream, aExpected.getLength()));
+    // This was HTML, not XHTML.
+    CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
+
+    htmlDocPtr pDoc = parseHtml(maTempFile);
+    CPPUNIT_ASSERT(pDoc);
+    // This was lang, not xml:lang.
+    assertXPath(pDoc, "/html/body", "xml:lang", "en-US");
+}
+
 CPPUNIT_PLUGIN_IMPLEMENT();
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/html/wrthtml.cxx b/sw/source/filter/html/wrthtml.cxx
index 1c1a215fd064..fb27971cf060 100644
--- a/sw/source/filter/html/wrthtml.cxx
+++ b/sw/source/filter/html/wrthtml.cxx
@@ -182,6 +182,8 @@ void SwHTMLWriter::SetupFilterOptions(SfxMedium& rMedium)
     {
         mbEmbedImages = true;
     }
+    else if (sFilterOptions == "XHTML")
+        mbXHTML = true;
 }
 
 ErrCode SwHTMLWriter::WriteStream()
@@ -931,7 +933,10 @@ const SwPageDesc *SwHTMLWriter::MakeHeader( sal_uInt16 &rHeaderAttrs )
     OStringBuffer sOut;
     if (!mbSkipHeaderFooter)
     {
-        sOut.append(OOO_STRING_SVTOOLS_HTML_doctype " " OOO_STRING_SVTOOLS_HTML_doctype40);
+        if (mbXHTML)
+            sOut.append(OOO_STRING_SVTOOLS_HTML_doctype " " OOO_STRING_SVTOOLS_XHTML_doctype11);
+        else
+            sOut.append(OOO_STRING_SVTOOLS_HTML_doctype " " OOO_STRING_SVTOOLS_HTML_doctype40);
         HTMLOutFuncs::Out_AsciiTag( Strm(), sOut.makeStringAndClear().getStr() );
 
         // build prelude
@@ -1276,8 +1281,12 @@ void SwHTMLWriter::OutLanguage( LanguageType nLang )
     if( LANGUAGE_DONTKNOW != nLang )
     {
         OStringBuffer sOut;
-        sOut.append(' ').append(OOO_STRING_SVTOOLS_HTML_O_lang)
-            .append("=\"");
+        sOut.append(' ');
+        if (mbXHTML)
+            sOut.append(OOO_STRING_SVTOOLS_XHTML_O_lang);
+        else
+            sOut.append(OOO_STRING_SVTOOLS_HTML_O_lang);
+        sOut.append("=\"");
         Strm().WriteCharPtr( sOut.makeStringAndClear().getStr() );
         HTMLOutFuncs::Out_String( Strm(), LanguageTag::convertToBcp47(nLang),
                                   m_eDestEnc, &m_aNonConvertableCharacters ).WriteChar( '"' );
diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx
index bacea748f300..60171e6dfea4 100644
--- a/sw/source/filter/html/wrthtml.hxx
+++ b/sw/source/filter/html/wrthtml.hxx
@@ -387,6 +387,8 @@ public:
     /// If HTML header and footer should be written as well, or just the content itself.
     bool mbSkipHeaderFooter : 1;
     bool mbEmbedImages : 1;
+    /// If XHTML markup should be written instead of HTML.
+    bool mbXHTML = false;
 
 #define sCSS2_P_CLASS_leaders "leaders"
     bool m_bCfgPrintLayout : 1;       // PrintLayout option for TOC dot leaders


More information about the Libreoffice-commits mailing list