[Libreoffice-commits] core.git: xmlsecurity/inc xmlsecurity/source

Miklos Vajna vmiklos at collabora.co.uk
Fri Oct 28 17:12:30 UTC 2016


 xmlsecurity/inc/pdfio/pdfdocument.hxx    |    7 ++
 xmlsecurity/source/pdfio/pdfdocument.cxx |   78 +++++++++++++++----------------
 2 files changed, 46 insertions(+), 39 deletions(-)

New commits:
commit 5c33253daf23224424cf6b181a2c2235c4a82539
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Fri Oct 28 18:33:47 2016 +0200

    xmlsecurity PDF verify: start using offsets from xref streams
    
    This is needed (but not enough) to verify PDF 1.5 signatures. What's
    missing next is support for object streams.
    
    Change-Id: I5afec0a77839ffabe0aaa07e367064210535a1a9

diff --git a/xmlsecurity/inc/pdfio/pdfdocument.hxx b/xmlsecurity/inc/pdfio/pdfdocument.hxx
index 80a8de6..95663e6c 100644
--- a/xmlsecurity/inc/pdfio/pdfdocument.hxx
+++ b/xmlsecurity/inc/pdfio/pdfdocument.hxx
@@ -63,11 +63,17 @@ class XMLSECURITY_DLLPUBLIC PDFDocument
     std::map<size_t, size_t> m_aXRef;
     /// Object ID <-> "are changed as part of an incremental update?" map.
     std::map<size_t, bool> m_aXRefDirty;
+    /// Object offset <-> Object pointer map.
+    std::map<size_t, PDFObjectElement*> m_aOffsetObjects;
+    /// Object ID <-> Object pointer map.
+    std::map<size_t, PDFObjectElement*> m_aIDObjects;
     /// List of xref offsets we know.
     std::vector<size_t> m_aStartXRefs;
     /// List of EOF offsets we know.
     std::vector<size_t> m_aEOFs;
     PDFTrailerElement* m_pTrailer;
+    /// When m_pTrailer is nullptr, this can still have a dictionary.
+    PDFObjectElement* m_pXRefStream;
     /// All editing takes place in this buffer, if it happens.
     SvMemoryStream m_aEditBuffer;
 
@@ -93,6 +99,7 @@ public:
     std::vector<PDFObjectElement*> GetPages();
     /// Remember the end location of an EOF token.
     void PushBackEOF(size_t nOffset);
+    const std::map<size_t, PDFObjectElement*>& GetIDObjects() const;
 
     /// Read elements from the start of the stream till its end.
     bool Read(SvStream& rStream);
diff --git a/xmlsecurity/source/pdfio/pdfdocument.cxx b/xmlsecurity/source/pdfio/pdfdocument.cxx
index 1fef027..4d6cbd4 100644
--- a/xmlsecurity/source/pdfio/pdfdocument.cxx
+++ b/xmlsecurity/source/pdfio/pdfdocument.cxx
@@ -100,7 +100,6 @@ public:
     PDFElement* Lookup(const OString& rDictionaryKey);
     PDFObjectElement* LookupObject(const OString& rDictionaryKey);
     double GetObjectValue() const;
-    double GetGenerationValue() const;
     void SetDictionaryOffset(sal_uInt64 nDictionaryOffset);
     sal_uInt64 GetDictionaryOffset();
     void SetDictionaryLength(sal_uInt64 nDictionaryLength);
@@ -277,7 +276,8 @@ public:
 };
 
 PDFDocument::PDFDocument()
-    : m_pTrailer(nullptr)
+    : m_pTrailer(nullptr),
+      m_pXRefStream(nullptr)
 {
 }
 
@@ -746,6 +746,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode)
                 {
                     bInStartXRef = false;
                     m_aStartXRefs.push_back(pNumberElement->GetValue());
+
+                    auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
+                    if (it != m_aOffsetObjects.end())
+                        m_pXRefStream = it->second;
                 }
             }
             else if (isalpha(ch))
@@ -776,6 +780,8 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode)
                     {
                         pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(), pGenerationNumber->GetValue());
                         m_aElements.push_back(std::unique_ptr<PDFElement>(pObject));
+                        m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
+                        m_aIDObjects[pObjectNumber->GetValue()] = pObject;
                     }
                     else
                     {
@@ -918,13 +924,9 @@ bool PDFDocument::Read(SvStream& rStream)
             return false;
         }
 
-        if (!m_pTrailer)
-        {
-            SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Read: found no trailer");
-            return false;
-        }
-
-        auto pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
+        PDFNumberElement* pPrev = nullptr;
+        if (m_pTrailer)
+            pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
         if (pPrev)
             nStartXRef = pPrev->GetValue();
 
@@ -933,6 +935,7 @@ bool PDFDocument::Read(SvStream& rStream)
         m_aStartXRefs.clear();
         m_aEOFs.clear();
         m_pTrailer = nullptr;
+        m_pXRefStream = nullptr;
         if (!pPrev)
             break;
     }
@@ -1197,6 +1200,7 @@ void PDFDocument::ReadXRefStream(SvStream& rStream)
             nStreamOffset = (nStreamOffset << 8) + nCh;
         }
 
+        // Generation number of the object.
         size_t nGenerationNumber = 0;
         nOffset = nPos;
         for (; nPos < nOffset + aW[2]; ++nPos)
@@ -1204,6 +1208,16 @@ void PDFDocument::ReadXRefStream(SvStream& rStream)
             unsigned char nCh = aFilteredLine[nPos];
             nGenerationNumber = (nGenerationNumber << 8) + nCh;
         }
+
+        // "n" entry of the xref table
+        if (nType == 1)
+        {
+            if (m_aXRef.find(nIndex) == m_aXRef.end())
+            {
+                m_aXRef[nIndex] = nStreamOffset;
+                m_aXRefDirty[nIndex] = false;
+            }
+        }
     }
 }
 
@@ -1346,17 +1360,21 @@ const std::vector< std::unique_ptr<PDFElement> >& PDFDocument::GetElements()
     return m_aElements;
 }
 
+const std::map<size_t, PDFObjectElement*>& PDFDocument::GetIDObjects() const
+{
+    return m_aIDObjects;
+}
+
 std::vector<PDFObjectElement*> PDFDocument::GetPages()
 {
     std::vector<PDFObjectElement*> aRet;
 
-    if (!m_pTrailer)
-    {
-        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::GetPages: found no trailer");
-        return aRet;
-    }
+    PDFReferenceElement* pRoot = nullptr;
+    if (m_pTrailer)
+        pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
+    else if (m_pXRefStream)
+        pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
 
-    auto pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
     if (!pRoot)
     {
         SAL_WARN("xmlsecurity.pdfio", "PDFDocument::GetPages: trailer has no Root key");
@@ -1373,7 +1391,7 @@ std::vector<PDFObjectElement*> PDFDocument::GetPages()
     PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
     if (!pPages)
     {
-        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::GetPages: catalog has no pages");
+        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue() << ") has no pages");
         return aRet;
     }
 
@@ -2237,11 +2255,6 @@ double PDFObjectElement::GetObjectValue() const
     return m_fObjectValue;
 }
 
-double PDFObjectElement::GetGenerationValue() const
-{
-    return m_fGenerationValue;
-}
-
 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
 {
     m_nDictionaryOffset = nDictionaryOffset;
@@ -2395,25 +2408,12 @@ double PDFReferenceElement::LookupNumber(SvStream& rStream) const
 
 PDFObjectElement* PDFReferenceElement::LookupObject() const
 {
-    const std::vector< std::unique_ptr<PDFElement> >& rElements = m_rDoc.GetElements();
-    // Iterate in reverse order, so in case an incremental update adds a newer
-    // version, we find it.
-    for (int i = rElements.size() - 1; i >= 0; --i)
-    {
-        const std::unique_ptr<PDFElement>& rElement = rElements[i];
-        auto* pObjectElement = dynamic_cast<PDFObjectElement*>(rElement.get());
-        if (!pObjectElement)
-            continue;
-
-        if (pObjectElement->GetObjectValue() != m_fObjectValue)
-            continue;
-
-        if (pObjectElement->GetGenerationValue() != m_fGenerationValue)
-            continue;
-
-        return pObjectElement;
-    }
+    const std::map<size_t, PDFObjectElement*>& rIDObjects = m_rDoc.GetIDObjects();
+    auto it = rIDObjects.find(m_fObjectValue);
+    if (it != rIDObjects.end())
+        return it->second;
 
+    SAL_WARN("xmlsecurity.pdfio", "PDFReferenceElement::LookupObject: can't find obj " << m_fObjectValue);
     return nullptr;
 }
 


More information about the Libreoffice-commits mailing list