[poppler] poppler/Parser.cc poppler/Parser.h

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Dec 11 16:02:23 UTC 2020


 poppler/Parser.cc |   50 ++++++++++++++++++++++++++++++++++----------------
 poppler/Parser.h  |    3 ++-
 2 files changed, 36 insertions(+), 17 deletions(-)

New commits:
commit acf4c8e1d1253f2c82c8e5ac009534b52deec88d
Author: Albert Astals Cid <aacid at kde.org>
Date:   Fri Dec 4 00:04:01 2020 +0100

    Don't decrypt the Contents field of Sig dictionaries
    
    I could not find whre in the PDF spec says that this field in particular
    is not encrypted, but i think it makes sense because how you write it,
    you have to reserve space first write the whole file, and then calculate
    the real signature and write it in the space you left blank before.
    
    If we encrypt the text, we can't know how long it'll be so we can't
    calculate how much space to leave available.
    
    Also i have a pdf where the Contents field is not encrypted (but the
    rest of the document is) and Adobe opens it fine, so that seems to imply
    this is the right thing to do.

diff --git a/poppler/Parser.cc b/poppler/Parser.cc
index a01608c3..03cec1dc 100644
--- a/poppler/Parser.cc
+++ b/poppler/Parser.cc
@@ -67,14 +67,22 @@ Object Parser::getObj(int recursion)
     return getObj(false, nullptr, cryptRC4, 0, 0, 0, recursion);
 }
 
-Object Parser::getObj(bool simpleOnly, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, int recursion, bool strict)
+static std::unique_ptr<GooString> decryptedString(const GooString *s, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
+{
+    DecryptStream decrypt(new MemStream(s->c_str(), 0, s->getLength(), Object(objNull)), fileKey, encAlgorithm, keyLength, { objNum, objGen });
+    decrypt.reset();
+    std::unique_ptr<GooString> res = std::make_unique<GooString>();
+    int c;
+    while ((c = decrypt.getChar()) != EOF) {
+        res->append((char)c);
+    }
+    return res;
+}
+
+Object Parser::getObj(bool simpleOnly, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, int recursion, bool strict, bool decryptString)
 {
     Object obj;
     Stream *str;
-    DecryptStream *decrypt;
-    const GooString *s;
-    GooString *s2;
-    int c;
 
     // refill buffer after inline image data
     if (inlineImg == 2) {
@@ -108,6 +116,7 @@ Object Parser::getObj(bool simpleOnly, const unsigned char *fileKey, CryptAlgori
     } else if (!simpleOnly && buf1.isCmd("<<")) {
         shift(objNum);
         obj = Object(new Dict(lexer.getXRef()));
+        bool hasContentsEntry = false;
         while (!buf1.isCmd(">>") && !buf1.isEOF()) {
             if (!buf1.isName()) {
                 error(errSyntaxError, getPos(), "Dictionary key must be a name object");
@@ -123,7 +132,12 @@ Object Parser::getObj(bool simpleOnly, const unsigned char *fileKey, CryptAlgori
                         goto err;
                     break;
                 }
-                Object obj2 = getObj(false, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1);
+                // We don't decrypt strings that are the value of "Contents" key entries. We decrypt them if needed a few lines below.
+                // The "Contents" field of Sig dictionaries is not encrypted, but we can't know the type of the dictionary here yet
+                // so we don't decrypt any Contents and if later we find it's not a Sig dictionary we decrypt it
+                const bool isContents = !hasContentsEntry && key.isName("Contents");
+                hasContentsEntry = hasContentsEntry || isContents;
+                Object obj2 = getObj(false, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1, /*strict*/ false, /*decryptString*/ !isContents);
                 if (unlikely(obj2.isError() && recursion + 1 >= recursionLimit)) {
                     break;
                 }
@@ -135,6 +149,17 @@ Object Parser::getObj(bool simpleOnly, const unsigned char *fileKey, CryptAlgori
             if (strict)
                 goto err;
         }
+        if (fileKey && hasContentsEntry) {
+            Dict *dict = obj.getDict();
+            const bool isSigDict = dict->is("Sig");
+            if (!isSigDict) {
+                const Object &contentsObj = dict->lookupNF("Contents");
+                if (contentsObj.isString()) {
+                    std::unique_ptr<GooString> s = decryptedString(contentsObj.getString(), fileKey, encAlgorithm, keyLength, objNum, objGen);
+                    dict->set("Contents", Object(s.release()));
+                }
+            }
+        }
         // stream objects are not allowed inside content streams or
         // object streams
         if (buf2.isCmd("stream")) {
@@ -169,16 +194,9 @@ Object Parser::getObj(bool simpleOnly, const unsigned char *fileKey, CryptAlgori
         }
 
         // string
-    } else if (buf1.isString() && fileKey) {
-        s = buf1.getString();
-        s2 = new GooString();
-        decrypt = new DecryptStream(new MemStream(s->c_str(), 0, s->getLength(), Object(objNull)), fileKey, encAlgorithm, keyLength, { objNum, objGen });
-        decrypt->reset();
-        while ((c = decrypt->getChar()) != EOF) {
-            s2->append((char)c);
-        }
-        delete decrypt;
-        obj = Object(s2);
+    } else if (decryptString && buf1.isString() && fileKey) {
+        std::unique_ptr<GooString> s2 = decryptedString(buf1.getString(), fileKey, encAlgorithm, keyLength, objNum, objGen);
+        obj = Object(s2.release());
         shift();
 
         // simple object
diff --git a/poppler/Parser.h b/poppler/Parser.h
index 648d02cd..1a5fc643 100644
--- a/poppler/Parser.h
+++ b/poppler/Parser.h
@@ -49,7 +49,8 @@ public:
     // Get the next object from the input stream.  If <simpleOnly> is
     // true, do not parse compound objects (arrays, dictionaries, or
     // streams).
-    Object getObj(bool simpleOnly = false, const unsigned char *fileKey = nullptr, CryptAlgorithm encAlgorithm = cryptRC4, int keyLength = 0, int objNum = 0, int objGen = 0, int recursion = 0, bool strict = false);
+    Object getObj(bool simpleOnly = false, const unsigned char *fileKey = nullptr, CryptAlgorithm encAlgorithm = cryptRC4, int keyLength = 0, int objNum = 0, int objGen = 0, int recursion = 0, bool strict = false,
+                  bool decryptString = true);
 
     Object getObj(int recursion);
     template<typename T>


More information about the poppler mailing list