[poppler] poppler/Gfx.cc poppler/MarkedContentOutputDev.cc poppler/MarkedContentOutputDev.h poppler/OutputDev.h poppler/StructElement.cc poppler/StructElement.h

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sat Oct 9 10:31:15 UTC 2021


 poppler/Gfx.cc                    |    2 ++
 poppler/MarkedContentOutputDev.cc |   25 +++++++++++++++++++++++--
 poppler/MarkedContentOutputDev.h  |    8 +++++++-
 poppler/OutputDev.h               |    2 ++
 poppler/StructElement.cc          |   15 ++++++++++++---
 poppler/StructElement.h           |    3 ++-
 6 files changed, 48 insertions(+), 7 deletions(-)

New commits:
commit 32f27b888d0e89cd40c086bd8d70381ee474078c
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Mon Oct 4 10:53:54 2021 +1030

    StructElement: support MCID in XObjects

diff --git a/poppler/Gfx.cc b/poppler/Gfx.cc
index d389f90d..c596b147 100644
--- a/poppler/Gfx.cc
+++ b/poppler/Gfx.cc
@@ -4120,7 +4120,9 @@ void Gfx::opXObject(Object args[], int numArgs)
             if (out->useDrawForm() && refObj.isRef()) {
                 out->drawForm(refObj.getRef());
             } else {
+                out->beginForm(refObj.getRef());
                 doForm(&obj1);
+                out->endForm(refObj.getRef());
             }
         }
         if (refObj.isRef() && shouldDoForm) {
diff --git a/poppler/MarkedContentOutputDev.cc b/poppler/MarkedContentOutputDev.cc
index 164c5cea..558af589 100644
--- a/poppler/MarkedContentOutputDev.cc
+++ b/poppler/MarkedContentOutputDev.cc
@@ -17,8 +17,9 @@
 #include "Annot.h"
 #include <vector>
 
-MarkedContentOutputDev::MarkedContentOutputDev(int mcidA) : currentFont(nullptr), currentText(nullptr), mcid(mcidA), pageWidth(0.0), pageHeight(0.0), unicodeMap(nullptr)
+MarkedContentOutputDev::MarkedContentOutputDev(int mcidA, const Object &stmObj) : currentFont(nullptr), currentText(nullptr), mcid(mcidA), pageWidth(0.0), pageHeight(0.0), unicodeMap(nullptr)
 {
+    stmRef = stmObj.copy();
     currentColor.r = currentColor.g = currentColor.b = 0;
 }
 
@@ -54,6 +55,26 @@ void MarkedContentOutputDev::endPage()
     pageWidth = pageHeight = 0.0;
 }
 
+void MarkedContentOutputDev::beginForm(Ref id)
+{
+    formStack.push_back(id);
+}
+
+void MarkedContentOutputDev::endForm(Ref id)
+{
+    formStack.pop_back();
+}
+
+bool MarkedContentOutputDev::contentStreamMatch()
+{
+    if (stmRef.isRef()) {
+        if (formStack.empty())
+            return false;
+        return formStack.back() == stmRef.getRef();
+    }
+    return formStack.empty();
+}
+
 void MarkedContentOutputDev::beginMarkedContent(const char *name, Dict *properties)
 {
     int id = -1;
@@ -64,7 +85,7 @@ void MarkedContentOutputDev::beginMarkedContent(const char *name, Dict *properti
         return;
 
     // The stack keep track of MCIDs of nested marked content.
-    if (inMarkedContent() || id == mcid)
+    if (inMarkedContent() || (id == mcid && contentStreamMatch()))
         mcidStack.push_back(id);
 }
 
diff --git a/poppler/MarkedContentOutputDev.h b/poppler/MarkedContentOutputDev.h
index 43351ae4..ace886d0 100644
--- a/poppler/MarkedContentOutputDev.h
+++ b/poppler/MarkedContentOutputDev.h
@@ -88,7 +88,7 @@ typedef std::vector<TextSpan> TextSpanArray;
 class POPPLER_PRIVATE_EXPORT MarkedContentOutputDev : public OutputDev
 {
 public:
-    explicit MarkedContentOutputDev(int mcidA);
+    explicit MarkedContentOutputDev(int mcidA, const Object &stmObj);
     ~MarkedContentOutputDev() override;
 
     virtual bool isOk() { return true; }
@@ -101,6 +101,9 @@ public:
     void startPage(int pageNum, GfxState *state, XRef *xref) override;
     void endPage() override;
 
+    void beginForm(Ref id) override;
+    void endForm(Ref id) override;
+
     void drawChar(GfxState *state, double xx, double yy, double dx, double dy, double ox, double oy, CharCode c, int nBytes, const Unicode *u, int uLen) override;
 
     void beginMarkedContent(const char *name, Dict *properties) override;
@@ -111,6 +114,7 @@ public:
 private:
     void endSpan();
     bool inMarkedContent() const { return mcidStack.size() > 0; }
+    bool contentStreamMatch();
     bool needFontChange(const GfxFont *font) const;
 
     GfxFont *currentFont;
@@ -119,9 +123,11 @@ private:
     TextSpanArray textSpans;
     int mcid;
     std::vector<int> mcidStack;
+    std::vector<Ref> formStack;
     double pageWidth;
     double pageHeight;
     const UnicodeMap *unicodeMap;
+    Object stmRef;
 };
 
 #endif /* !MARKEDCONTENTOUTPUTDEV_H */
diff --git a/poppler/OutputDev.h b/poppler/OutputDev.h
index 3a8430c8..00b42e47 100644
--- a/poppler/OutputDev.h
+++ b/poppler/OutputDev.h
@@ -321,6 +321,8 @@ public:
 
     //----- form XObjects
     virtual void drawForm(Ref /*id*/) { }
+    virtual void beginForm(Ref /*id*/) { }
+    virtual void endForm(Ref /*id*/) { }
 
     //----- PostScript XObjects
     virtual void psXObject(Stream * /*psStream*/, Stream * /*level1Stream*/) { }
diff --git a/poppler/StructElement.cc b/poppler/StructElement.cc
index 0a0f744f..2c703668 100644
--- a/poppler/StructElement.cc
+++ b/poppler/StructElement.cc
@@ -813,7 +813,7 @@ const Attribute *StructElement::findAttribute(Attribute::Type attributeType, boo
 GooString *StructElement::appendSubTreeText(GooString *string, bool recursive) const
 {
     if (isContent() && !isObjectRef()) {
-        MarkedContentOutputDev mcdev(getMCID());
+        MarkedContentOutputDev mcdev(getMCID(), stmRef);
         const TextSpanArray &spans(getTextSpansInternal(mcdev));
 
         if (!string)
@@ -1031,9 +1031,9 @@ StructElement *StructElement::parseChild(const Object *ref, Object *childObj, st
         child = new StructElement(childObj->getInt(), treeRoot, this);
     } else if (childObj->isDict("MCR")) {
         /*
-         * TODO: The optional Stm/StwOwn attributes are not handled, so all the
-         *      page will be always scanned when calling StructElement::getText().
+         * TODO: The optional StmOwn attribute is not handled.
          */
+
         Object mcidObj = childObj->dictLookup("MCID");
         if (!mcidObj.isInt()) {
             error(errSyntaxError, -1, "MCID object is wrong type ({0:s})", mcidObj.getTypeName());
@@ -1046,6 +1046,15 @@ StructElement *StructElement::parseChild(const Object *ref, Object *childObj, st
         if (pageRefObj.isRef()) {
             child->pageRef = std::move(pageRefObj);
         }
+
+        const Object &stmObj = childObj->dictLookupNF("Stm");
+        if (stmObj.isRef()) {
+            child->stmRef = stmObj.copy();
+        } else if (!stmObj.isNull()) {
+            error(errSyntaxError, -1, "Stm object is wrong type ({0:s})", stmObj.getTypeName());
+            return nullptr;
+        }
+
     } else if (childObj->isDict("OBJR")) {
         const Object &refObj = childObj->dictLookupNF("Obj");
         if (refObj.isRef()) {
diff --git a/poppler/StructElement.h b/poppler/StructElement.h
index 0d53e4ef..933be643 100644
--- a/poppler/StructElement.h
+++ b/poppler/StructElement.h
@@ -329,7 +329,7 @@ public:
     {
         if (!isContent())
             return TextSpanArray();
-        MarkedContentOutputDev mcdev(getMCID());
+        MarkedContentOutputDev mcdev(getMCID(), stmRef);
         return getTextSpansInternal(mcdev);
     }
 
@@ -379,6 +379,7 @@ private:
     StructTreeRoot *treeRoot;
     StructElement *parent;
     mutable Object pageRef;
+    Object stmRef;
 
     union {
         StructData *s;


More information about the poppler mailing list