[poppler] poppler/Gfx.cc poppler/MarkedContentOutputDev.cc poppler/MarkedContentOutputDev.h poppler/OutputDev.h poppler/StructElement.cc poppler/StructElement.h
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Sat Oct 9 10:31:15 UTC 2021
poppler/Gfx.cc | 2 ++
poppler/MarkedContentOutputDev.cc | 25 +++++++++++++++++++++++--
poppler/MarkedContentOutputDev.h | 8 +++++++-
poppler/OutputDev.h | 2 ++
poppler/StructElement.cc | 15 ++++++++++++---
poppler/StructElement.h | 3 ++-
6 files changed, 48 insertions(+), 7 deletions(-)
New commits:
commit 32f27b888d0e89cd40c086bd8d70381ee474078c
Author: Adrian Johnson <ajohnson at redneon.com>
Date: Mon Oct 4 10:53:54 2021 +1030
StructElement: support MCID in XObjects
diff --git a/poppler/Gfx.cc b/poppler/Gfx.cc
index d389f90d..c596b147 100644
--- a/poppler/Gfx.cc
+++ b/poppler/Gfx.cc
@@ -4120,7 +4120,9 @@ void Gfx::opXObject(Object args[], int numArgs)
if (out->useDrawForm() && refObj.isRef()) {
out->drawForm(refObj.getRef());
} else {
+ out->beginForm(refObj.getRef());
doForm(&obj1);
+ out->endForm(refObj.getRef());
}
}
if (refObj.isRef() && shouldDoForm) {
diff --git a/poppler/MarkedContentOutputDev.cc b/poppler/MarkedContentOutputDev.cc
index 164c5cea..558af589 100644
--- a/poppler/MarkedContentOutputDev.cc
+++ b/poppler/MarkedContentOutputDev.cc
@@ -17,8 +17,9 @@
#include "Annot.h"
#include <vector>
-MarkedContentOutputDev::MarkedContentOutputDev(int mcidA) : currentFont(nullptr), currentText(nullptr), mcid(mcidA), pageWidth(0.0), pageHeight(0.0), unicodeMap(nullptr)
+MarkedContentOutputDev::MarkedContentOutputDev(int mcidA, const Object &stmObj) : currentFont(nullptr), currentText(nullptr), mcid(mcidA), pageWidth(0.0), pageHeight(0.0), unicodeMap(nullptr)
{
+ stmRef = stmObj.copy();
currentColor.r = currentColor.g = currentColor.b = 0;
}
@@ -54,6 +55,26 @@ void MarkedContentOutputDev::endPage()
pageWidth = pageHeight = 0.0;
}
+void MarkedContentOutputDev::beginForm(Ref id)
+{
+ formStack.push_back(id);
+}
+
+void MarkedContentOutputDev::endForm(Ref id)
+{
+ formStack.pop_back();
+}
+
+bool MarkedContentOutputDev::contentStreamMatch()
+{
+ if (stmRef.isRef()) {
+ if (formStack.empty())
+ return false;
+ return formStack.back() == stmRef.getRef();
+ }
+ return formStack.empty();
+}
+
void MarkedContentOutputDev::beginMarkedContent(const char *name, Dict *properties)
{
int id = -1;
@@ -64,7 +85,7 @@ void MarkedContentOutputDev::beginMarkedContent(const char *name, Dict *properti
return;
// The stack keep track of MCIDs of nested marked content.
- if (inMarkedContent() || id == mcid)
+ if (inMarkedContent() || (id == mcid && contentStreamMatch()))
mcidStack.push_back(id);
}
diff --git a/poppler/MarkedContentOutputDev.h b/poppler/MarkedContentOutputDev.h
index 43351ae4..ace886d0 100644
--- a/poppler/MarkedContentOutputDev.h
+++ b/poppler/MarkedContentOutputDev.h
@@ -88,7 +88,7 @@ typedef std::vector<TextSpan> TextSpanArray;
class POPPLER_PRIVATE_EXPORT MarkedContentOutputDev : public OutputDev
{
public:
- explicit MarkedContentOutputDev(int mcidA);
+ explicit MarkedContentOutputDev(int mcidA, const Object &stmObj);
~MarkedContentOutputDev() override;
virtual bool isOk() { return true; }
@@ -101,6 +101,9 @@ public:
void startPage(int pageNum, GfxState *state, XRef *xref) override;
void endPage() override;
+ void beginForm(Ref id) override;
+ void endForm(Ref id) override;
+
void drawChar(GfxState *state, double xx, double yy, double dx, double dy, double ox, double oy, CharCode c, int nBytes, const Unicode *u, int uLen) override;
void beginMarkedContent(const char *name, Dict *properties) override;
@@ -111,6 +114,7 @@ public:
private:
void endSpan();
bool inMarkedContent() const { return mcidStack.size() > 0; }
+ bool contentStreamMatch();
bool needFontChange(const GfxFont *font) const;
GfxFont *currentFont;
@@ -119,9 +123,11 @@ private:
TextSpanArray textSpans;
int mcid;
std::vector<int> mcidStack;
+ std::vector<Ref> formStack;
double pageWidth;
double pageHeight;
const UnicodeMap *unicodeMap;
+ Object stmRef;
};
#endif /* !MARKEDCONTENTOUTPUTDEV_H */
diff --git a/poppler/OutputDev.h b/poppler/OutputDev.h
index 3a8430c8..00b42e47 100644
--- a/poppler/OutputDev.h
+++ b/poppler/OutputDev.h
@@ -321,6 +321,8 @@ public:
//----- form XObjects
virtual void drawForm(Ref /*id*/) { }
+ virtual void beginForm(Ref /*id*/) { }
+ virtual void endForm(Ref /*id*/) { }
//----- PostScript XObjects
virtual void psXObject(Stream * /*psStream*/, Stream * /*level1Stream*/) { }
diff --git a/poppler/StructElement.cc b/poppler/StructElement.cc
index 0a0f744f..2c703668 100644
--- a/poppler/StructElement.cc
+++ b/poppler/StructElement.cc
@@ -813,7 +813,7 @@ const Attribute *StructElement::findAttribute(Attribute::Type attributeType, boo
GooString *StructElement::appendSubTreeText(GooString *string, bool recursive) const
{
if (isContent() && !isObjectRef()) {
- MarkedContentOutputDev mcdev(getMCID());
+ MarkedContentOutputDev mcdev(getMCID(), stmRef);
const TextSpanArray &spans(getTextSpansInternal(mcdev));
if (!string)
@@ -1031,9 +1031,9 @@ StructElement *StructElement::parseChild(const Object *ref, Object *childObj, st
child = new StructElement(childObj->getInt(), treeRoot, this);
} else if (childObj->isDict("MCR")) {
/*
- * TODO: The optional Stm/StwOwn attributes are not handled, so all the
- * page will be always scanned when calling StructElement::getText().
+ * TODO: The optional StmOwn attribute is not handled.
*/
+
Object mcidObj = childObj->dictLookup("MCID");
if (!mcidObj.isInt()) {
error(errSyntaxError, -1, "MCID object is wrong type ({0:s})", mcidObj.getTypeName());
@@ -1046,6 +1046,15 @@ StructElement *StructElement::parseChild(const Object *ref, Object *childObj, st
if (pageRefObj.isRef()) {
child->pageRef = std::move(pageRefObj);
}
+
+ const Object &stmObj = childObj->dictLookupNF("Stm");
+ if (stmObj.isRef()) {
+ child->stmRef = stmObj.copy();
+ } else if (!stmObj.isNull()) {
+ error(errSyntaxError, -1, "Stm object is wrong type ({0:s})", stmObj.getTypeName());
+ return nullptr;
+ }
+
} else if (childObj->isDict("OBJR")) {
const Object &refObj = childObj->dictLookupNF("Obj");
if (refObj.isRef()) {
diff --git a/poppler/StructElement.h b/poppler/StructElement.h
index 0d53e4ef..933be643 100644
--- a/poppler/StructElement.h
+++ b/poppler/StructElement.h
@@ -329,7 +329,7 @@ public:
{
if (!isContent())
return TextSpanArray();
- MarkedContentOutputDev mcdev(getMCID());
+ MarkedContentOutputDev mcdev(getMCID(), stmRef);
return getTextSpansInternal(mcdev);
}
@@ -379,6 +379,7 @@ private:
StructTreeRoot *treeRoot;
StructElement *parent;
mutable Object pageRef;
+ Object stmRef;
union {
StructData *s;
More information about the poppler
mailing list