[poppler] utils/ImageOutputDev.cc utils/ImageOutputDev.h utils/pdfimages.1 utils/pdfimages.cc

Adrian Johnson ajohnson at kemper.freedesktop.org
Fri Feb 17 15:27:29 PST 2012


 utils/ImageOutputDev.cc |  204 ++++++++++++++++++++++++++++++++++++++++++------
 utils/ImageOutputDev.h  |   20 ++++
 utils/pdfimages.1       |  101 +++++++++++++++++++++++
 utils/pdfimages.cc      |   12 +-
 4 files changed, 307 insertions(+), 30 deletions(-)

New commits:
commit 521d3740e9b7d2cfacf29f089a4a8f6c962de807
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Mon Feb 13 22:05:18 2012 +1030

    pdfimages: add -list option to list all images
    
    Bug 46066

diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc
index c043641..24ab1b9 100644
--- a/utils/ImageOutputDev.cc
+++ b/utils/ImageOutputDev.cc
@@ -44,19 +44,29 @@
 #include "Stream.h"
 #include "ImageOutputDev.h"
 
-ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA) {
-  fileRoot = copyString(fileRootA);
-  fileName = (char *)gmalloc(strlen(fileRoot) + 45);
+ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA) {
+  listImages = listImagesA;
+  if (!listImages) {
+    fileRoot = copyString(fileRootA);
+    fileName = (char *)gmalloc(strlen(fileRoot) + 45);
+  }
   dumpJPEG = dumpJPEGA;
   pageNames = pageNamesA;
   imgNum = 0;
   pageNum = 0;
   ok = gTrue;
+  if (listImages) {
+    printf("page   num  type   width height color comp bpc  enc interp  object ID\n");
+    printf("---------------------------------------------------------------------\n");
+  }
 }
 
+
 ImageOutputDev::~ImageOutputDev() {
-  gfree(fileName);
-  gfree(fileRoot);
+  if (!listImages) {
+    gfree(fileName);
+    gfree(fileRoot);
+  }
 }
 
 void ImageOutputDev::setFilename(const char *fileExt) {
@@ -67,18 +77,124 @@ void ImageOutputDev::setFilename(const char *fileExt) {
   }
 }
 
-GBool ImageOutputDev::tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
-				  double *pmat, int paintType, int tilingType, Dict *resDict,
-				  double *mat, double *bbox,
-				  int x0, int y0, int x1, int y1,
-				  double xStep, double yStep) {
-  return gTrue;
-  // do nothing -- this avoids the potentially slow loop in Gfx.cc
+void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str,
+			       int width, int height,
+			       GfxImageColorMap *colorMap,
+			       GBool interpolate, GBool inlineImg,
+			       ImageType imageType) {
+  const char *type;
+  const char *colorspace;
+  const char *enc;
+  int components, bpc;
+
+  printf("%4d %5d ", pageNum, imgNum);
+  type = "";
+  switch (imageType) {
+  case imgImage:
+    type = "image";
+    break;
+  case imgStencil:
+    type = "stencil";
+    break;
+  case imgMask:
+    type = "mask";
+    break;
+  case imgSmask:
+    type = "smask";
+    break;
+  }
+  printf("%-7s %5d %5d  ", type, width, height);
+
+  colorspace = "-";
+  /* masks and stencils default to ncomps = 1 and bpc = 1 */
+  components = 1;
+  bpc = 1;
+  if (colorMap && colorMap->isOk()) {
+    switch (colorMap->getColorSpace()->getMode()) {
+      case csDeviceGray:
+      case csCalGray:
+        colorspace = "gray";
+        break;
+      case csDeviceRGB:
+      case csCalRGB:
+        colorspace = "rgb";
+        break;
+      case csDeviceCMYK:
+        colorspace = "cmyk";
+        break;
+      case csLab:
+        colorspace = "lab";
+        break;
+      case csICCBased:
+        colorspace = "icc";
+        break;
+      case csIndexed:
+        colorspace = "index";
+        break;
+      case csSeparation:
+        colorspace = "sep";
+        break;
+      case csDeviceN:
+        colorspace = "devn";
+        break;
+      case csPattern:
+      default:
+        colorspace = "-";
+        break;
+    }
+    components = colorMap->getNumPixelComps();
+    bpc = colorMap->getBits();
+  }
+  printf("%-5s  %2d  %2d  ", colorspace, components, bpc);
+
+  switch (str->getKind()) {
+  case strCCITTFax:
+    enc = "ccitt";
+    break;
+  case strDCT:
+    enc = "jpeg";
+    break;
+  case strJPX:
+    enc = "jpx";
+    break;
+  case strJBIG2:
+    enc = "jbig2";
+    break;
+  case strFile:
+  case strFlate:
+  case strCachedFile:
+  case strASCIIHex:
+  case strASCII85:
+  case strLZW:
+  case strRunLength:
+  case strWeird:
+  default:
+    enc = "image";
+    break;
+  }
+  printf("%-5s  ", enc);
+
+  printf("%-3s  ", interpolate ? "yes" : "no");
+
+  if (inlineImg) {
+    printf("[inline]\n");
+  } else if (ref->isRef()) {
+    const Ref imageRef = ref->getRef();
+    if (imageRef.gen >= 100000) {
+      printf("[none]\n");
+    } else {
+      printf(" %6d %2d\n", imageRef.num, imageRef.gen);
+    }
+  } else {
+    printf("[none]\n");
+  }
+
+  ++imgNum;
 }
 
-void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
-				   int width, int height, GBool invert,
-				   GBool interpolate, GBool inlineImg) {
+void ImageOutputDev::writeMask(GfxState *state, Object *ref, Stream *str,
+			       int width, int height, GBool invert,
+			       GBool interpolate, GBool inlineImg) {
   FILE *f;
   int c;
   int size, i;
@@ -132,10 +248,10 @@ void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
   }
 }
 
-void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
-			       int width, int height,
-			       GfxImageColorMap *colorMap,
-			       GBool interpolate, int *maskColors, GBool inlineImg) {
+void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str,
+				int width, int height,
+				GfxImageColorMap *colorMap,
+				GBool interpolate, int *maskColors, GBool inlineImg) {
   FILE *f;
   ImageStream *imgStr;
   Guchar *p;
@@ -250,13 +366,46 @@ void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
   }
 }
 
+GBool ImageOutputDev::tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
+				  double *pmat, int paintType, int tilingType, Dict *resDict,
+				  double *mat, double *bbox,
+				  int x0, int y0, int x1, int y1,
+				  double xStep, double yStep) {
+  return gTrue;
+  // do nothing -- this avoids the potentially slow loop in Gfx.cc
+}
+
+void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
+				   int width, int height, GBool invert,
+				   GBool interpolate, GBool inlineImg) {
+  if (listImages)
+    listImage(state, ref, str, width, height, NULL, interpolate, inlineImg, imgMask);
+  else
+    writeMask(state, ref, str, width, height, invert, interpolate, inlineImg);
+}
+
+void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
+			       int width, int height,
+			       GfxImageColorMap *colorMap,
+			       GBool interpolate, int *maskColors, GBool inlineImg) {
+  if (listImages)
+    listImage(state, ref, str, width, height, colorMap, interpolate, inlineImg, imgImage);
+  else
+    writeImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg);
+}
+
 void ImageOutputDev::drawMaskedImage(
   GfxState *state, Object *ref, Stream *str,
   int width, int height, GfxImageColorMap *colorMap, GBool interpolate,
   Stream *maskStr, int maskWidth, int maskHeight, GBool maskInvert, GBool maskInterpolate) {
-  drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
-  drawImageMask(state, ref, maskStr, maskWidth, maskHeight, maskInvert,
-		maskInterpolate, gFalse);
+  if (listImages) {
+    listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage);
+    listImage(state, ref, str, maskWidth, maskHeight, NULL, maskInterpolate, gFalse, imgMask);
+  } else {
+    drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
+    drawImageMask(state, ref, maskStr, maskWidth, maskHeight, maskInvert,
+		  maskInterpolate, gFalse);
+  }
 }
 
 void ImageOutputDev::drawSoftMaskedImage(
@@ -264,7 +413,12 @@ void ImageOutputDev::drawSoftMaskedImage(
   int width, int height, GfxImageColorMap *colorMap, GBool interpolate,
   Stream *maskStr, int maskWidth, int maskHeight,
   GfxImageColorMap *maskColorMap, GBool maskInterpolate) {
-  drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
-  drawImage(state, ref, maskStr, maskWidth, maskHeight,
-	    maskColorMap, maskInterpolate, NULL, gFalse);
+  if (listImages) {
+    listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage);
+    listImage(state, ref, maskStr, maskWidth, height, maskColorMap, maskInterpolate, gFalse, imgSmask);
+  } else {
+    drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
+    drawImage(state, ref, maskStr, maskWidth, maskHeight,
+	      maskColorMap, maskInterpolate, NULL, gFalse);
+  }
 }
diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h
index fabe6b5..2689b68 100644
--- a/utils/ImageOutputDev.h
+++ b/utils/ImageOutputDev.h
@@ -44,13 +44,19 @@ class GfxState;
 
 class ImageOutputDev: public OutputDev {
 public:
+  enum ImageType {
+    imgImage,
+    imgStencil,
+    imgMask,
+    imgSmask
+  };
 
   // Create an OutputDev which will write images to files named
   // <fileRoot>-NNN.<type> or <fileRoot>-PPP-NNN.<type>, if 
   // <pageNames> is set. Normally, all images are written as PBM
   // (.pbm) or PPM (.ppm) files.  If <dumpJPEG> is set, JPEG images 
   // are written as JPEG (.jpg) files.
-  ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA);
+  ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA);
 
   // Destructor.
   virtual ~ImageOutputDev();
@@ -115,10 +121,22 @@ public:
 private:
   // Sets the output filename with a given file extension
   void setFilename(const char *fileExt);
+  void listImage(GfxState *state, Object *ref, Stream *str,
+		 int width, int height,
+		 GfxImageColorMap *colorMap,
+		 GBool interpolate, GBool inlineImg,
+		 ImageType imageType);
+  void writeMask(GfxState *state, Object *ref, Stream *str,
+		 int width, int height, GBool invert,
+		 GBool interpolate, GBool inlineImg);
+  void writeImage(GfxState *state, Object *ref, Stream *str,
+                  int width, int height, GfxImageColorMap *colorMap,
+                  GBool interpolate, int *maskColors, GBool inlineImg);
 
 
   char *fileRoot;		// root of output file names
   char *fileName;		// buffer for output file names
+  GBool listImages;		// list images instead of dumping
   GBool dumpJPEG;		// set to dump native JPEG files
   GBool pageNames;		// set to include page number in file names
   int pageNum;			// current page number
diff --git a/utils/pdfimages.1 b/utils/pdfimages.1
index 96116fa..955d8b3 100644
--- a/utils/pdfimages.1
+++ b/utils/pdfimages.1
@@ -35,6 +35,107 @@ Normally, all images are written as PBM (for monochrome images) or PPM
 format are saved as JPEG files.  All non-DCT images are saved in
 PBM/PPM format as usual.
 .TP
+.B \-list
+Instead of writing the images, list the images along with various information for each image. Do not specify an
+.IR image-root
+with this option.
+.IP
+The following information is listed for each font:
+.RS
+.TP
+.B page
+the page number containing the image
+.TP
+.B num
+the image number
+.TP
+.B type
+the image type:
+.PP
+.RS
+image - an opaque image
+.RE
+.RS
+mask - a monochrome mask image
+.RE
+.RS
+smask - a soft-mask image
+.RE
+.RS
+stencil - a monochrome mask image used for painting a color or pattern
+.RE
+.PP
+Note: Tranparency in images is represented in PDF using a separate image for the image and the mask/smask.
+The mask/smask used as part of a transparent image always immediately follows the image in the image list.
+.TP
+.B width
+image width (in pixels)
+.TP
+.B height
+image height (in pixels)
+.PP
+Note: the image width/height is the size of the embedded image, not the size the image will be rendered at.
+.TP
+.B color
+image color space:
+.PP
+.RS
+gray - Gray
+.RE
+.RS
+rgb - RGB
+.RE
+.RS
+cmyk - CMYK
+.RE
+.RS
+lab - L*a*b
+.RE
+.RS
+icc - ICC Based
+.RE
+.RS
+index - Indexed Color
+.RE
+.RS
+sep - Separation
+.RE
+.RS
+devn - DeviceN
+.RE
+.TP
+.B comp
+number of color components
+.TP
+.B bpc
+bits per component
+.TP
+.B enc
+encoding:
+.PP
+.RS
+image - raster image (may be Flate or LZW compressed but does not use an image encoding)
+.RE
+.RS
+jpeg - Joint Photographic Experts Group
+.RE
+.RS
+jp2 - JPEG2000
+.RE
+.RS
+jbig2 - Joint Bi-Level Image Experts Group
+.RE
+.RS
+ccitt - CCITT Group 3 or Group 4 Fax
+.RE
+.TP
+.B interp
+"yes" if the interpolation is to be performed when scaling up the image
+.TP
+.B object ID
+the font dictionary object ID (number and generation)
+.RE
+.TP
 .BI \-opw " password"
 Specify the owner password for the PDF file.  Providing this will
 bypass all security restrictions.
diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc
index 2383b6b..fdbd64d 100644
--- a/utils/pdfimages.cc
+++ b/utils/pdfimages.cc
@@ -48,6 +48,7 @@
 
 static int firstPage = 1;
 static int lastPage = 0;
+static GBool listImages = gFalse;
 static GBool dumpJPEG = gFalse;
 static GBool pageNames = gFalse;
 static char ownerPassword[33] = "\001";
@@ -63,6 +64,8 @@ static const ArgDesc argDesc[] = {
    "last page to convert"},
   {"-j",      argFlag,     &dumpJPEG,      0,
    "write JPEG images as JPEG files"},
+  {"-list",   argFlag,     &listImages,      0,
+   "print list of images instead of saving"},
   {"-opw",    argString,   ownerPassword,  sizeof(ownerPassword),
    "owner password (for encrypted files)"},
   {"-upw",    argString,   userPassword,   sizeof(userPassword),
@@ -87,7 +90,7 @@ static const ArgDesc argDesc[] = {
 int main(int argc, char *argv[]) {
   PDFDoc *doc;
   GooString *fileName;
-  char *imgRoot;
+  char *imgRoot = NULL;
   GooString *ownerPW, *userPW;
   ImageOutputDev *imgOut;
   GBool ok;
@@ -97,7 +100,7 @@ int main(int argc, char *argv[]) {
 
   // parse args
   ok = parseArgs(argDesc, &argc, argv);
-  if (!ok || argc != 3 || printVersion || printHelp) {
+  if (!ok || (listImages && argc != 2) || (!listImages && argc != 3) || printVersion || printHelp) {
     fprintf(stderr, "pdfimages version %s\n", PACKAGE_VERSION);
     fprintf(stderr, "%s\n", popplerCopyright);
     fprintf(stderr, "%s\n", xpdfCopyright);
@@ -109,7 +112,8 @@ int main(int argc, char *argv[]) {
     goto err0;
   }
   fileName = new GooString(argv[1]);
-  imgRoot = argv[2];
+  if (!listImages)
+    imgRoot = argv[2];
 
   // read config file
   globalParams = new GlobalParams();
@@ -163,7 +167,7 @@ int main(int argc, char *argv[]) {
     lastPage = doc->getNumPages();
 
   // write image files
-  imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG);
+  imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG, listImages);
   if (imgOut->isOk()) {
       doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0,
 			gTrue, gFalse, gFalse);


More information about the poppler mailing list