[Libreoffice-commits] core.git: include/tools tools/source vcl/inc vcl/Library_vcl.mk vcl/source

Tomaž Vajngerl (via logerrit) logerrit at kemper.freedesktop.org
Sat Dec 28 18:47:26 UTC 2019


 include/tools/XmlWriter.hxx       |    2 
 tools/source/xml/XmlWriter.cxx    |   11 +-
 vcl/Library_vcl.mk                |    1 
 vcl/inc/pdf/XmpMetadata.hxx       |   47 +++++++++++
 vcl/source/gdi/pdfwriter_impl.cxx |  145 ++++++----------------------------
 vcl/source/pdf/XmpMetadata.cxx    |  159 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 245 insertions(+), 120 deletions(-)

New commits:
commit d016e052ddf30649ad9b729b59134ce1e90a0263
Author:     Tomaž Vajngerl <tomaz.vajngerl at collabora.co.uk>
AuthorDate: Thu Dec 19 20:55:16 2019 +0100
Commit:     Tomaž Vajngerl <quikee at gmail.com>
CommitDate: Sat Dec 28 19:46:50 2019 +0100

    pdf: extract XMP metadata writing and use XmlWriter
    
    Instead of writing XMP metadata with a string buffer, change to
    use XmlWriter instead. Extract XMP metadata writing into its own
    class vcl::pdf::XmpMetadata.
    
    This also needs a change to the XmlWriter to not write a classic
    XML header: '<?xml version="1.0" ... ?>'
    
    Change-Id: I95ea0e7ba58e7c43a0c707bf9c676994210ff104
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/85908
    Reviewed-by: Tomaž Vajngerl <quikee at gmail.com>
    Tested-by: Tomaž Vajngerl <quikee at gmail.com>

diff --git a/include/tools/XmlWriter.hxx b/include/tools/XmlWriter.hxx
index da056c68a596..7efe3a57353a 100644
--- a/include/tools/XmlWriter.hxx
+++ b/include/tools/XmlWriter.hxx
@@ -40,7 +40,7 @@ public:
 
     ~XmlWriter();
 
-    bool startDocument(sal_Int32 nIndent = 2);
+    bool startDocument(sal_Int32 nIndent = 2, bool bWriteXmlHeader = true);
     void endDocument();
 
     void startElement(const OString& sName);
diff --git a/tools/source/xml/XmlWriter.cxx b/tools/source/xml/XmlWriter.cxx
index 3400a6e9d94b..a314eed6e940 100644
--- a/tools/source/xml/XmlWriter.cxx
+++ b/tools/source/xml/XmlWriter.cxx
@@ -36,11 +36,13 @@ struct XmlWriterImpl
     XmlWriterImpl(SvStream* pStream)
         : mpStream(pStream)
         , mpWriter(nullptr)
+        , mbWriteXmlHeader(true)
     {
     }
 
     SvStream* const mpStream;
     xmlTextWriterPtr mpWriter;
+    bool mbWriteXmlHeader;
 };
 
 XmlWriter::XmlWriter(SvStream* pStream)
@@ -54,21 +56,24 @@ XmlWriter::~XmlWriter()
         endDocument();
 }
 
-bool XmlWriter::startDocument(sal_Int32 nIndent)
+bool XmlWriter::startDocument(sal_Int32 nIndent, bool bWriteXmlHeader)
 {
+    mpImpl->mbWriteXmlHeader = bWriteXmlHeader;
     xmlOutputBufferPtr xmlOutBuffer
         = xmlOutputBufferCreateIO(funcWriteCallback, funcCloseCallback, mpImpl->mpStream, nullptr);
     mpImpl->mpWriter = xmlNewTextWriter(xmlOutBuffer);
     if (mpImpl->mpWriter == nullptr)
         return false;
     xmlTextWriterSetIndent(mpImpl->mpWriter, nIndent);
-    xmlTextWriterStartDocument(mpImpl->mpWriter, nullptr, "UTF-8", nullptr);
+    if (mpImpl->mbWriteXmlHeader)
+        xmlTextWriterStartDocument(mpImpl->mpWriter, nullptr, "UTF-8", nullptr);
     return true;
 }
 
 void XmlWriter::endDocument()
 {
-    xmlTextWriterEndDocument(mpImpl->mpWriter);
+    if (mpImpl->mbWriteXmlHeader)
+        xmlTextWriterEndDocument(mpImpl->mpWriter);
     xmlFreeTextWriter(mpImpl->mpWriter);
     mpImpl->mpWriter = nullptr;
 }
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index b401d811f596..0bc271576da7 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -448,6 +448,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
     vcl/source/fontsubset/sft \
     vcl/source/fontsubset/ttcr \
     vcl/source/fontsubset/xlat \
+    vcl/source/pdf/XmpMetadata \
     vcl/source/uitest/logger \
     vcl/source/uitest/uiobject \
     vcl/source/uitest/uitest \
diff --git a/vcl/inc/pdf/XmpMetadata.hxx b/vcl/inc/pdf/XmpMetadata.hxx
new file mode 100644
index 000000000000..d9f9cacc45b4
--- /dev/null
+++ b/vcl/inc/pdf/XmpMetadata.hxx
@@ -0,0 +1,47 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#ifndef INCLUDED_VCL_INC_PDF_XMPMETADATA_HXX
+#define INCLUDED_VCL_INC_PDF_XMPMETADATA_HXX
+
+#include <vcl/dllapi.h>
+#include <rtl/string.hxx>
+#include <tools/stream.hxx>
+#include <memory>
+
+namespace vcl::pdf
+{
+class XmpMetadata
+{
+private:
+    bool mbWritten;
+    std::unique_ptr<SvMemoryStream> mpMemoryStream;
+
+public:
+    OString msTitle;
+    OString msAuthor;
+    OString msSubject;
+    OString msProducer;
+    OString msKeywords;
+    sal_Int32 mnPDF_A;
+
+public:
+    XmpMetadata();
+    sal_uInt64 getSize();
+    const void* getData();
+
+private:
+    void write();
+};
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index f5fb555b1f56..891d5db93a7b 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -74,6 +74,7 @@
 #include <textlineinfo.hxx>
 #include <bitmapwriteaccess.hxx>
 #include <impglyphitem.hxx>
+#include <pdf/XmpMetadata.hxx>
 
 #include "pdfwriter_impl.hxx"
 
@@ -5232,132 +5233,44 @@ sal_Int32 PDFWriterImpl::emitDocumentMetadata()
 
     if( updateObject( nObject ) )
     {
-        // the following string are written in UTF-8 unicode
-        OStringBuffer aMetadataStream( 8192 );
+        pdf::XmpMetadata aMetadata;
 
-        aMetadataStream.append( "<?xpacket begin=\"" );
-        // these lines write Unicode "zero width non-breaking space character" (U+FEFF)
-        // (aka byte-order mark ) used as a byte-order marker.
-        aMetadataStream.append( OUStringToOString( OUString( u'\xFEFF' ), RTL_TEXTENCODING_UTF8 ) );
-        aMetadataStream.append( "\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" );
-        aMetadataStream.append( "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\">\n" );
-        aMetadataStream.append( " <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" );
-        //PDF/A part ( ISO 19005-1:2005 - 6.7.11 )
-        aMetadataStream.append( "  <rdf:Description rdf:about=\"\"\n" );
-        aMetadataStream.append( "      xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n" );
-        if( m_bIsPDF_A2 )
-        {
-            aMetadataStream.append( "   <pdfaid:part>2</pdfaid:part>\n" );
-            aMetadataStream.append( "   <pdfaid:conformance>B</pdfaid:conformance>\n" );
-        }
-        else
+        if (m_bIsPDF_A1)
+            aMetadata.mnPDF_A = 1;
+        else if (m_bIsPDF_A2)
+            aMetadata.mnPDF_A = 2;
+
+        if (!m_aContext.DocumentInfo.Title.isEmpty())
         {
-            aMetadataStream.append( "   <pdfaid:part>1</pdfaid:part>\n" );
-            aMetadataStream.append( "   <pdfaid:conformance>A</pdfaid:conformance>\n" );
+            OUString aTempString;
+            escapeStringXML(m_aContext.DocumentInfo.Title, aTempString);
+            aMetadata.msTitle = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
         }
-        aMetadataStream.append( "  </rdf:Description>\n" );
-        //... Dublin Core properties go here
-        if( !m_aContext.DocumentInfo.Title.isEmpty() ||
-            !m_aContext.DocumentInfo.Author.isEmpty() ||
-            !m_aContext.DocumentInfo.Subject.isEmpty() )
+        if (!m_aContext.DocumentInfo.Author.isEmpty())
         {
-            aMetadataStream.append( "  <rdf:Description rdf:about=\"\"\n" );
-            aMetadataStream.append( "      xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n" );
-            if( !m_aContext.DocumentInfo.Title.isEmpty() )
-            {
-                // this is according to PDF/A-1, technical corrigendum 1 (2007-04-01)
-                aMetadataStream.append( "   <dc:title>\n" );
-                aMetadataStream.append( "    <rdf:Alt>\n" );
-                aMetadataStream.append( "     <rdf:li xml:lang=\"x-default\">" );
-                OUString aTitle;
-                escapeStringXML( m_aContext.DocumentInfo.Title, aTitle );
-                aMetadataStream.append( OUStringToOString( aTitle, RTL_TEXTENCODING_UTF8 )  );
-                aMetadataStream.append( "</rdf:li>\n" );
-                aMetadataStream.append( "    </rdf:Alt>\n" );
-                aMetadataStream.append( "   </dc:title>\n" );
-            }
-            if( !m_aContext.DocumentInfo.Author.isEmpty() )
-            {
-                aMetadataStream.append( "   <dc:creator>\n" );
-                aMetadataStream.append( "    <rdf:Seq>\n" );
-                aMetadataStream.append( "     <rdf:li>" );
-                OUString aAuthor;
-                escapeStringXML( m_aContext.DocumentInfo.Author, aAuthor );
-                aMetadataStream.append( OUStringToOString( aAuthor , RTL_TEXTENCODING_UTF8 )  );
-                aMetadataStream.append( "</rdf:li>\n" );
-                aMetadataStream.append( "    </rdf:Seq>\n" );
-                aMetadataStream.append( "   </dc:creator>\n" );
-            }
-            if( !m_aContext.DocumentInfo.Subject.isEmpty() )
-            {
-                // this is according to PDF/A-1, technical corrigendum 1 (2007-04-01)
-                aMetadataStream.append( "   <dc:description>\n" );
-                aMetadataStream.append( "    <rdf:Alt>\n" );
-                aMetadataStream.append( "     <rdf:li xml:lang=\"x-default\">" );
-                OUString aSubject;
-                escapeStringXML( m_aContext.DocumentInfo.Subject, aSubject );
-                aMetadataStream.append( OUStringToOString( aSubject , RTL_TEXTENCODING_UTF8 )  );
-                aMetadataStream.append( "</rdf:li>\n" );
-                aMetadataStream.append( "    </rdf:Alt>\n" );
-                aMetadataStream.append( "   </dc:description>\n" );
-            }
-            aMetadataStream.append( "  </rdf:Description>\n" );
+            OUString aTempString;
+            escapeStringXML(m_aContext.DocumentInfo.Author, aTempString);
+            aMetadata.msAuthor = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
         }
-
-        //... PDF properties go here
-        if( !m_aContext.DocumentInfo.Producer.isEmpty() ||
-            !m_aContext.DocumentInfo.Keywords.isEmpty() )
+        if (!m_aContext.DocumentInfo.Subject.isEmpty())
         {
-            aMetadataStream.append( "  <rdf:Description rdf:about=\"\"\n" );
-            aMetadataStream.append( "     xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\">\n" );
-            if( !m_aContext.DocumentInfo.Producer.isEmpty() )
-            {
-                aMetadataStream.append( "   <pdf:Producer>" );
-                OUString aProducer;
-                escapeStringXML( m_aContext.DocumentInfo.Producer, aProducer );
-                aMetadataStream.append( OUStringToOString( aProducer , RTL_TEXTENCODING_UTF8 )  );
-                aMetadataStream.append( "</pdf:Producer>\n" );
-            }
-            if( !m_aContext.DocumentInfo.Keywords.isEmpty() )
-            {
-                aMetadataStream.append( "   <pdf:Keywords>" );
-                OUString aKeywords;
-                escapeStringXML( m_aContext.DocumentInfo.Keywords, aKeywords );
-                aMetadataStream.append( OUStringToOString( aKeywords , RTL_TEXTENCODING_UTF8 )  );
-                aMetadataStream.append( "</pdf:Keywords>\n" );
-            }
-            aMetadataStream.append( "  </rdf:Description>\n" );
+            OUString aTempString;
+            escapeStringXML(m_aContext.DocumentInfo.Subject, aTempString);
+            aMetadata.msSubject = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
         }
-
-        aMetadataStream.append( "  <rdf:Description rdf:about=\"\"\n" );
-        aMetadataStream.append( "    xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\">\n" );
-        if( !m_aContext.DocumentInfo.Creator.isEmpty() )
+        if (!m_aContext.DocumentInfo.Producer.isEmpty())
         {
-            aMetadataStream.append( "   <xmp:CreatorTool>" );
-            OUString aCreator;
-            escapeStringXML( m_aContext.DocumentInfo.Creator, aCreator );
-            aMetadataStream.append( OUStringToOString( aCreator , RTL_TEXTENCODING_UTF8 )  );
-            aMetadataStream.append( "</xmp:CreatorTool>\n" );
+            OUString aTempString;
+            escapeStringXML(m_aContext.DocumentInfo.Producer, aTempString);
+            aMetadata.msProducer = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
         }
-        //creation date
-        aMetadataStream.append( "   <xmp:CreateDate>" );
-        aMetadataStream.append( m_aCreationMetaDateString );
-        aMetadataStream.append( "</xmp:CreateDate>\n" );
-
-        aMetadataStream.append( "  </rdf:Description>\n" );
-        aMetadataStream.append( " </rdf:RDF>\n" );
-        aMetadataStream.append( "</x:xmpmeta>\n" );
-
-        //add the padding
-        for( sal_Int32 nSpaces = 1; nSpaces <= 2100; nSpaces++ )
+        if (!m_aContext.DocumentInfo.Keywords.isEmpty())
         {
-            aMetadataStream.append( " " );
-            if( nSpaces % 100 == 0 )
-                aMetadataStream.append( "\n" );
+            OUString aTempString;
+            escapeStringXML(m_aContext.DocumentInfo.Keywords, aTempString);
+            aMetadata.msKeywords = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
         }
 
-        aMetadataStream.append( "<?xpacket end=\"w\"?>\n" );
-
         OStringBuffer aMetadataObj( 1024 );
 
         aMetadataObj.append( nObject );
@@ -5365,12 +5278,12 @@ sal_Int32 PDFWriterImpl::emitDocumentMetadata()
 
         aMetadataObj.append( "<</Type/Metadata/Subtype/XML/Length " );
 
-        aMetadataObj.append( aMetadataStream.getLength() );
+        aMetadataObj.append( sal_Int32(aMetadata.getSize()) );
         aMetadataObj.append( ">>\nstream\n" );
         if ( !writeBuffer( aMetadataObj.getStr(), aMetadataObj.getLength() ) )
             return 0;
         //emit the stream
-        if ( !writeBuffer( aMetadataStream.getStr(), aMetadataStream.getLength() ) )
+        if ( !writeBuffer( aMetadata.getData(), aMetadata.getSize() ) )
             return 0;
 
         aMetadataObj.setLength( 0 );
diff --git a/vcl/source/pdf/XmpMetadata.cxx b/vcl/source/pdf/XmpMetadata.cxx
new file mode 100644
index 000000000000..d9033f4875ae
--- /dev/null
+++ b/vcl/source/pdf/XmpMetadata.cxx
@@ -0,0 +1,159 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include <pdf/XmpMetadata.hxx>
+#include <tools/XmlWriter.hxx>
+
+namespace vcl::pdf
+{
+namespace
+{
+constexpr const char* constPadding = "                                        "
+                                     "                                        "
+                                     "                                        "
+                                     "                                        "
+                                     "                                        "
+                                     "\n";
+}
+
+XmpMetadata::XmpMetadata()
+    : mbWritten(false)
+    , mnPDF_A(0)
+{
+}
+
+void XmpMetadata::write()
+{
+    mpMemoryStream = std::make_unique<SvMemoryStream>(4096 /*Initial*/, 64 /*Resize*/);
+
+    // Header
+    mpMemoryStream->WriteOString("<?xpacket begin=\"");
+    mpMemoryStream->WriteOString(OUStringToOString(OUString(u'\xFEFF'), RTL_TEXTENCODING_UTF8));
+    mpMemoryStream->WriteOString("\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n");
+
+    {
+        tools::XmlWriter aXmlWriter(mpMemoryStream.get());
+        aXmlWriter.startDocument(2, false);
+        aXmlWriter.startElement("x", "xmpmeta", "adobe:ns:meta/");
+        aXmlWriter.startElement("rdf", "RDF", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
+
+        // PDF/A part ( ISO 19005-1:2005 - 6.7.11 )
+        if (mnPDF_A > 0)
+        {
+            OString sPdfVersion = OString::number(mnPDF_A);
+            OString sPdfConformance = (mnPDF_A == 1) ? "A" : "B";
+
+            aXmlWriter.startElement("rdf:Description");
+            aXmlWriter.attribute("rdf:about", OString(""));
+            aXmlWriter.attribute("xmlns:pdfaid", OString("http://www.aiim.org/pdfa/ns/id/"));
+
+            aXmlWriter.startElement("pdfaid:part");
+            aXmlWriter.content(sPdfVersion);
+            aXmlWriter.endElement();
+
+            aXmlWriter.startElement("pdfaid:conformance");
+            aXmlWriter.content(sPdfConformance);
+            aXmlWriter.endElement();
+
+            aXmlWriter.endElement();
+        }
+
+        // Dublin Core properties
+        if (!msTitle.isEmpty() || !msAuthor.isEmpty() || !msSubject.isEmpty())
+        {
+            aXmlWriter.startElement("rdf:Description");
+            aXmlWriter.attribute("rdf:about", OString(""));
+            aXmlWriter.attribute("xmlns:dc", OString("http://purl.org/dc/elements/1.1/"));
+            if (!msTitle.isEmpty())
+            {
+                // this is according to PDF/A-1, technical corrigendum 1 (2007-04-01)
+                aXmlWriter.startElement("dc:title");
+                aXmlWriter.startElement("rdf:Alt");
+                aXmlWriter.startElement("rdf:li");
+                aXmlWriter.attribute("xml:lang", OString("x-default"));
+                aXmlWriter.content(msTitle);
+                aXmlWriter.endElement();
+                aXmlWriter.endElement();
+                aXmlWriter.endElement();
+            }
+            if (!msAuthor.isEmpty())
+            {
+                aXmlWriter.startElement("dc:creator");
+                aXmlWriter.startElement("rdf:Seq");
+                aXmlWriter.startElement("rdf:li");
+                aXmlWriter.content(msAuthor);
+                aXmlWriter.endElement();
+                aXmlWriter.endElement();
+                aXmlWriter.endElement();
+            }
+            if (!msSubject.isEmpty())
+            {
+                aXmlWriter.startElement("dc:description");
+                aXmlWriter.startElement("rdf:Alt");
+                aXmlWriter.startElement("rdf:li");
+                aXmlWriter.attribute("xml:lang", OString("x-default"));
+                aXmlWriter.content(msSubject);
+                aXmlWriter.endElement();
+                aXmlWriter.endElement();
+                aXmlWriter.endElement();
+            }
+            aXmlWriter.endElement();
+        }
+
+        // PDF properties
+        if (!msProducer.isEmpty() || !msKeywords.isEmpty())
+        {
+            aXmlWriter.startElement("rdf:Description");
+            aXmlWriter.attribute("rdf:about", OString(""));
+            aXmlWriter.attribute("xmlns:pdf", OString("http://ns.adobe.com/pdf/1.3/"));
+            if (!msProducer.isEmpty())
+            {
+                aXmlWriter.startElement("pdf:Producer");
+                aXmlWriter.content(msProducer);
+                aXmlWriter.endElement();
+            }
+            if (!msKeywords.isEmpty())
+            {
+                aXmlWriter.startElement("pdf:Keywords");
+                aXmlWriter.content(msKeywords);
+                aXmlWriter.endElement();
+            }
+            aXmlWriter.endElement();
+        }
+        aXmlWriter.endElement();
+        aXmlWriter.endElement();
+        aXmlWriter.endDocument();
+    }
+
+    // add padding (needed so the metadata can be changed in-place"
+    for (sal_Int32 nSpaces = 1; nSpaces <= 21; nSpaces++)
+        mpMemoryStream->WriteOString(constPadding);
+
+    mpMemoryStream->WriteOString("<?xpacket end=\"w\"?>\n");
+    mbWritten = true;
+}
+
+sal_uInt64 XmpMetadata::getSize()
+{
+    if (!mbWritten)
+        write();
+    return mpMemoryStream->GetSize();
+}
+
+const void* XmpMetadata::getData()
+{
+    if (!mbWritten)
+        write();
+    return mpMemoryStream->GetData();
+}
+
+} // end vcl::pdf
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */


More information about the Libreoffice-commits mailing list