[poppler] poppler/Gfx.cc poppler/Stream.cc poppler/Stream.h utils/ImageOutputDev.cc utils/ImageOutputDev.h
Adrian Johnson
ajohnson at kemper.freedesktop.org
Wed Aug 16 21:35:26 UTC 2017
poppler/Gfx.cc | 2 -
poppler/Stream.cc | 96 ++++++++++++++++++++++++++++++++++++++++++------
poppler/Stream.h | 14 ++++++-
utils/ImageOutputDev.cc | 67 ++++++++++++++++++++++++++++++---
utils/ImageOutputDev.h | 1
5 files changed, 159 insertions(+), 21 deletions(-)
New commits:
commit 488d28ec9507eb99c7cb4cd2cafb54995a8bc9f8
Author: Adrian Johnson <ajohnson at redneon.com>
Date: Wed Aug 16 21:01:07 2017 +0930
pdfimages: support listing/extracting inline images
The difficulty with extracting inline images is that inline images do
not provide any way of determining the length or end of image data
without decoding the image. We can get the length by using ImageStream
to decode the data then check the stream position. But then we are
still unable to extract the undecoded image data because embedded
streams can only be read once.
Since inline images tend to be small the solution implemented is to
modify EmbedStream to keep a copy of the data read from it in memory
and then allow the data to be read again.
Two new functions have been added to EmbedStream. rewind() will cause
EmbedStream.getChar() to stop recording data and switch to replaying
the saved data, returning EOF when the end of the saved data is
reached. The restore() function will make getChar() switch back to
reading from the parent stream.
ImageOutputDev can now extract or get the image size by first using
ImageStream to read data from the embedded stream. After calling
rewind() the undecoded image data can be read from the embedded stream
until EOF is returned. Then restore() is called so that Gfx can read
the 'EI' from the end of the embedded stream.
Bug 25625
diff --git a/poppler/Gfx.cc b/poppler/Gfx.cc
index be9810e1..2bfc1ecd 100644
--- a/poppler/Gfx.cc
+++ b/poppler/Gfx.cc
@@ -4901,7 +4901,7 @@ Stream *Gfx::buildImageStream() {
// make stream
if (parser->getStream()) {
- str = new EmbedStream(parser->getStream(), std::move(dict), gFalse, 0);
+ str = new EmbedStream(parser->getStream(), std::move(dict), gFalse, 0, gTrue);
str = str->addFilters(str->getDict());
} else {
str = NULL;
diff --git a/poppler/Stream.cc b/poppler/Stream.cc
index df767652..9cb48abc 100644
--- a/poppler/Stream.cc
+++ b/poppler/Stream.cc
@@ -1033,14 +1033,25 @@ void MemStream::moveStart(Goffset delta) {
//------------------------------------------------------------------------
EmbedStream::EmbedStream(Stream *strA, Object &&dictA,
- GBool limitedA, Goffset lengthA):
+ GBool limitedA, Goffset lengthA, GBool reusableA):
BaseStream(std::move(dictA), lengthA) {
str = strA;
limited = limitedA;
length = lengthA;
+ reusable = reusableA;
+ record = gFalse;
+ replay = gFalse;
+ if (reusable) {
+ bufData = (unsigned char*)gmalloc(16384);
+ bufMax = 16384;
+ bufLen = 0;
+ record = gTrue;
+ }
}
EmbedStream::~EmbedStream() {
+ if (reusable)
+ gfree(bufData);
}
BaseStream *EmbedStream::copy() {
@@ -1054,31 +1065,94 @@ Stream *EmbedStream::makeSubStream(Goffset start, GBool limitedA,
return NULL;
}
+void EmbedStream::rewind() {
+ record = gFalse;
+ replay = gTrue;
+ bufPos = 0;
+}
+
+void EmbedStream::restore() {
+ replay = gFalse;
+}
+
+Goffset EmbedStream::getPos() {
+ if (replay)
+ return bufPos;
+ else
+ return str->getPos();
+}
+
int EmbedStream::getChar() {
- if (limited && !length) {
- return EOF;
+ if (replay) {
+ if (bufPos < bufLen)
+ return bufData[bufPos++];
+ else
+ return EOF;
+ } else {
+ if (limited && !length) {
+ return EOF;
+ }
+ int c = str->getChar();
+ --length;
+ if (record) {
+ bufData[bufLen] = c;
+ bufLen++;
+ if (bufLen >= bufMax) {
+ bufMax *= 2;
+ bufData = (unsigned char *)grealloc(bufData, bufMax);
+ }
+ }
+ return c;
}
- --length;
- return str->getChar();
}
int EmbedStream::lookChar() {
- if (limited && !length) {
- return EOF;
+ if (replay) {
+ if (bufPos < bufLen)
+ return bufData[bufPos];
+ else
+ return EOF;
+ } else {
+ if (limited && !length) {
+ return EOF;
+ }
+ return str->lookChar();
}
- return str->lookChar();
}
int EmbedStream::getChars(int nChars, Guchar *buffer) {
+ int len;
+
if (nChars <= 0) {
return 0;
}
- if (limited && length < nChars) {
- nChars = length;
+ if (replay) {
+ if (bufPos >= bufLen)
+ return EOF;
+ len = bufLen - bufPos;
+ if (nChars > len)
+ nChars = len;
+ memcpy(buffer, bufData, len);
+ return len;
+ } else {
+ if (limited && length < nChars) {
+ nChars = length;
+ }
+ len = str->doGetChars(nChars, buffer);
+ if (record) {
+ if (bufLen + len >= bufMax) {
+ while (bufLen + len >= bufMax)
+ bufMax *= 2;
+ bufData = (unsigned char *)grealloc(bufData, bufMax);
+ }
+ memcpy(bufData+bufLen, buffer, len);
+ bufLen += len;
+ }
}
- return str->doGetChars(nChars, buffer);
+ return len;
}
+
void EmbedStream::setPos(Goffset pos, int dir) {
error(errInternal, -1, "Internal: called setPos() on EmbedStream");
}
diff --git a/poppler/Stream.h b/poppler/Stream.h
index 2317080e..7e67697c 100644
--- a/poppler/Stream.h
+++ b/poppler/Stream.h
@@ -607,7 +607,7 @@ private:
class EmbedStream: public BaseStream {
public:
- EmbedStream(Stream *strA, Object &&dictA, GBool limitedA, Goffset lengthA);
+ EmbedStream(Stream *strA, Object &&dictA, GBool limitedA, Goffset lengthA, GBool reusableA = gFalse);
~EmbedStream();
BaseStream *copy() override;
Stream *makeSubStream(Goffset start, GBool limitedA,
@@ -616,7 +616,7 @@ public:
void reset() override {}
int getChar() override;
int lookChar() override;
- Goffset getPos() override { return str->getPos(); }
+ Goffset getPos() override;
void setPos(Goffset pos, int dir = 0) override;
Goffset getStart() override;
void moveStart(Goffset delta) override;
@@ -624,6 +624,8 @@ public:
int getUnfilteredChar () override { return str->getUnfilteredChar(); }
void unfilteredReset () override { str->unfilteredReset(); }
+ void rewind();
+ void restore();
private:
@@ -632,6 +634,14 @@ private:
Stream *str;
GBool limited;
+ GBool reusable;
+ GBool record;
+ GBool replay;
+ unsigned char *bufData;
+ long bufMax;
+ long bufLen;
+ long bufPos;
+
};
//------------------------------------------------------------------------
diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc
index f6fb35dd..33cbb714 100644
--- a/utils/ImageOutputDev.cc
+++ b/utils/ImageOutputDev.cc
@@ -246,7 +246,9 @@ void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str,
printf("%5.0f ", yppi);
Goffset embedSize = -1;
- if (!inlineImg)
+ if (inlineImg)
+ embedSize = getInlineImageLength(str, width, height, colorMap);
+ else
embedSize = str->getBaseStream()->getLength();
long long imageSize = 0;
@@ -311,6 +313,43 @@ void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str,
}
}
+long ImageOutputDev::getInlineImageLength(Stream *str, int width, int height,
+ GfxImageColorMap *colorMap) {
+ long len;
+
+ if (colorMap) {
+ ImageStream *imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(),
+ colorMap->getBits());
+ imgStr->reset();
+ for (int y = 0; y < height; y++)
+ imgStr->getLine();
+
+ imgStr->close();
+ delete imgStr;
+ } else {
+ str->reset();
+ for (int y = 0; y < height; y++) {
+ int size = (width + 7)/8;
+ for (int x = 0; x < size; x++)
+ str->getChar();
+ }
+ }
+
+ EmbedStream *embedStr = (EmbedStream *) (str->getBaseStream());
+ embedStr->rewind();
+ if (str->getKind() == strDCT || str->getKind() == strCCITTFax)
+ str = str->getNextStream();
+ len = 0;
+ str->reset();
+ while (str->getChar() != EOF)
+ len++;
+
+ embedStr->restore();
+
+
+ return len;
+}
+
void ImageOutputDev::writeRawImage(Stream *str, const char *ext) {
FILE *f;
int c;
@@ -498,15 +537,21 @@ void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap, GBool inlineImg) {
ImageFormat format;
+ EmbedStream *embedStr;
- if (dumpJPEG && str->getKind() == strDCT &&
- (colorMap->getNumPixelComps() == 1 ||
- colorMap->getNumPixelComps() == 3) &&
- !inlineImg) {
+ if (dumpJPEG && str->getKind() == strDCT) {
+ if (inlineImg) {
+ embedStr = (EmbedStream *) (str->getBaseStream());
+ getInlineImageLength(str, width, height, colorMap); // record the strean
+ embedStr->rewind();
+ }
// dump JPEG file
writeRawImage(str, "jpg");
+ if (inlineImg)
+ embedStr->restore();
+
} else if (dumpJP2 && str->getKind() == strJPX && !inlineImg) {
// dump JPEG2000 file
writeRawImage(str, "jp2");
@@ -535,7 +580,7 @@ void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str,
// dump JBIG2 embedded file
writeRawImage(str, "jb2e");
- } else if (dumpCCITT && str->getKind() == strCCITTFax && !inlineImg) {
+ } else if (dumpCCITT && str->getKind() == strCCITTFax) {
// write CCITT parameters
CCITTFaxStream *ccittStr = static_cast<CCITTFaxStream *>(str);
FILE *f;
@@ -567,14 +612,22 @@ void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str,
fclose(f);
+ if (inlineImg) {
+ embedStr = (EmbedStream *) (str->getBaseStream());
+ getInlineImageLength(str, width, height, colorMap); // record the strean
+ embedStr->rewind();
+ }
+
// dump CCITT file
writeRawImage(str, "ccitt");
+ if (inlineImg)
+ embedStr->restore();
+
} else if (outputPNG && !(outputTiff && colorMap &&
(colorMap->getColorSpace()->getMode() == csDeviceCMYK ||
(colorMap->getColorSpace()->getMode() == csICCBased &&
colorMap->getNumPixelComps() == 4)))) {
-
// output in PNG format
#if ENABLE_LIBPNG
diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h
index 22954cf0..baccd8ef 100644
--- a/utils/ImageOutputDev.h
+++ b/utils/ImageOutputDev.h
@@ -160,6 +160,7 @@ private:
void writeRawImage(Stream *str, const char *ext);
void writeImageFile(ImgWriter *writer, ImageFormat format, const char *ext,
Stream *str, int width, int height, GfxImageColorMap *colorMap);
+ long getInlineImageLength(Stream *str, int width, int height, GfxImageColorMap *colorMap);
char *fileRoot; // root of output file names
char *fileName; // buffer for output file names
More information about the poppler
mailing list