[poppler] CMakeLists.txt glib/poppler-document.cc glib/poppler-document.h glib/reference poppler/JSInfo.cc poppler/JSInfo.h poppler/PDFDoc.cc poppler/PDFDoc.h utils/CMakeLists.txt utils/JSInfo.cc utils/JSInfo.h

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jun 30 19:43:23 UTC 2020


 CMakeLists.txt                      |    2 ++
 glib/poppler-document.cc            |   16 ++++++++++++++++
 glib/poppler-document.h             |    3 +++
 glib/reference/poppler-sections.txt |    1 +
 poppler/JSInfo.cc                   |   30 ++++++++++++++++++++++++++++++
 poppler/JSInfo.h                    |    4 ++++
 poppler/PDFDoc.cc                   |    7 +++++++
 poppler/PDFDoc.h                    |    2 ++
 utils/CMakeLists.txt                |    4 ----
 9 files changed, 65 insertions(+), 4 deletions(-)

New commits:
commit 4b9a643e7308852f1bc6e5932287c313e14416a5
Author: Nelson Benítez León <nbenitezl at gmail.com>
Date:   Thu Jun 18 15:31:23 2020 -0400

    Move utils/JSInfo.cc utils/JSInfo.h to core poppler
    
    and add new JSInfo::scanJS() variant that returns
    immediately after finding JS. This variant is used
    by newly added PDFDoc::hasJavascript() method which
    is in turn used by newly added poppler-glib function
    poppler_document_has_javascript()

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 34eeb266..3a44f7ac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -365,6 +365,7 @@ set(poppler_SRCS
   poppler/Hints.cc
   poppler/JArithmeticDecoder.cc
   poppler/JBIG2Stream.cc
+  poppler/JSInfo.cc
   poppler/Lexer.cc
   poppler/Link.cc
   poppler/Linearization.cc
@@ -574,6 +575,7 @@ if(ENABLE_UNSTABLE_API_ABI_HEADERS)
     poppler/Hints.h
     poppler/JArithmeticDecoder.h
     poppler/JBIG2Stream.h
+    poppler/JSInfo.h
     poppler/Lexer.h
     poppler/Link.h
     poppler/Linearization.h
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index 0501cf19..eaa8e5d7 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -2040,6 +2040,22 @@ poppler_document_reset_form (PopplerDocument *document,
   }
 }
 
+/**
+ * poppler_document_has_javascript:
+ * @document: A #PopplerDocument
+ *
+ * Returns whether @document has any javascript in it.
+ *
+ * Since: 0.90
+ **/
+gboolean
+poppler_document_has_javascript (PopplerDocument *document)
+{
+  g_return_val_if_fail (POPPLER_IS_DOCUMENT (document), FALSE);
+
+  return document->doc->hasJavascript();
+}
+
 static void
 poppler_document_get_property (GObject    *object,
 			       guint       prop_id,
diff --git a/glib/poppler-document.h b/glib/poppler-document.h
index 570504f2..e0753a0d 100644
--- a/glib/poppler-document.h
+++ b/glib/poppler-document.h
@@ -430,6 +430,9 @@ POPPLER_PUBLIC
 void               poppler_document_reset_form             (PopplerDocument  *document,
                                                             GList            *fields,
                                                             gboolean          exclude_fields);
+/* Javascript */
+POPPLER_PUBLIC
+gboolean           poppler_document_has_javascript         (PopplerDocument  *document);
 
 /* Interface for getting the Index of a poppler_document */
 #define POPPLER_TYPE_INDEX_ITER                 (poppler_index_iter_get_type ())
diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt
index 280b1742..93c884e3 100644
--- a/glib/reference/poppler-sections.txt
+++ b/glib/reference/poppler-sections.txt
@@ -187,6 +187,7 @@ poppler_document_get_producer
 poppler_document_get_subject
 poppler_document_get_title
 poppler_document_has_attachments
+poppler_document_has_javascript
 poppler_document_is_linearized
 poppler_document_new_from_bytes
 poppler_document_new_from_data
diff --git a/utils/JSInfo.cc b/poppler/JSInfo.cc
similarity index 94%
rename from utils/JSInfo.cc
rename to poppler/JSInfo.cc
index 08b47693..4d3f5919 100644
--- a/utils/JSInfo.cc
+++ b/poppler/JSInfo.cc
@@ -87,6 +87,7 @@ void JSInfo::scanLinkAction(LinkAction *link, const char *action) {
 void JSInfo::scanJS(int nPages) {
   print = false;
   file = nullptr;
+  onlyFirstJS = false;
   scan(nPages);
 }
 
@@ -94,6 +95,14 @@ void JSInfo::scanJS(int nPages, FILE *fout, const UnicodeMap *uMap) {
   print = true;
   file = fout;
   uniMap = uMap;
+  onlyFirstJS = false;
+  scan(nPages);
+}
+
+void JSInfo::scanJS(int nPages, bool stopOnFirstJS) {
+  print = false;
+  file = nullptr;
+  onlyFirstJS = stopOnFirstJS;
   scan(nPages);
 }
 
@@ -108,6 +117,9 @@ void JSInfo::scan(int nPages) {
   int numNames = doc->getCatalog()->numJS();
   if (numNames > 0) {
     hasJS = true;
+    if (onlyFirstJS) {
+      return;
+    }
     if (print) {
       for (int i = 0; i < numNames; i++) {
 	fprintf(file, "Name Dictionary \"%s\":\n", doc->getCatalog()->getJSName(i)->c_str());
@@ -131,6 +143,9 @@ void JSInfo::scan(int nPages) {
   scanLinkAction(doc->getCatalog()->getAdditionalAction(Catalog::actionPrintDocumentFinish).get(),
                  "After Print Document");
 
+  if (onlyFirstJS && hasJS) {
+    return;
+  }
   // form field actions
   if (doc->getCatalog()->getFormType() == Catalog::AcroForm) {
     Form *form = doc->getCatalog()->getForm();
@@ -148,6 +163,9 @@ void JSInfo::scan(int nPages) {
                        "Validate Field");
 	scanLinkAction(widget->getAdditionalAction(Annot::actionCalculateField).get(),
                        "Calculate Field");
+        if (onlyFirstJS && hasJS) {
+          return;
+        }
       }
     }
   }
@@ -171,12 +189,18 @@ void JSInfo::scan(int nPages) {
     scanLinkAction(page->getAdditionalAction(Page::actionOpenPage).get(), "Page Open");
     scanLinkAction(page->getAdditionalAction(Page::actionClosePage).get(), "Page Close");
 
+    if (onlyFirstJS && hasJS) {
+          return;
+    }
     // annotation actions (links, screen, widget)
     annots = page->getAnnots();
     for (int i = 0; i < annots->getNumAnnots(); ++i) {
       if (annots->getAnnot(i)->getType() == Annot::typeLink) {
 	AnnotLink *annot = static_cast<AnnotLink *>(annots->getAnnot(i));
 	scanLinkAction(annot->getAction(), "Link Annotation Activated");
+	if (onlyFirstJS && hasJS) {
+	  return;
+	}
       } else if (annots->getAnnot(i)->getType() == Annot::typeScreen) {
 	AnnotScreen *annot = static_cast<AnnotScreen *>(annots->getAnnot(i));
 	scanLinkAction(annot->getAction(),
@@ -202,6 +226,9 @@ void JSInfo::scan(int nPages) {
 	scanLinkAction(annot->getAdditionalAction(Annot::actionPageInvisible).get(),
                        "Screen Annotation Page Invisible");
 
+	if (onlyFirstJS && hasJS) {
+	  return;
+	}
       } else if (annots->getAnnot(i)->getType() == Annot::typeWidget) {
 	AnnotWidget *annot = static_cast<AnnotWidget *>(annots->getAnnot(i));
 	scanLinkAction(annot->getAction(),
@@ -226,6 +253,9 @@ void JSInfo::scan(int nPages) {
                        "Widget Annotation Page Visible");
 	scanLinkAction(annot->getAdditionalAction(Annot::actionPageInvisible).get(),
                        "Widget Annotation Page Invisible");
+	if (onlyFirstJS && hasJS) {
+	  return;
+	}
       }
     }
   }
diff --git a/utils/JSInfo.h b/poppler/JSInfo.h
similarity index 89%
rename from utils/JSInfo.h
rename to poppler/JSInfo.h
index 93c50055..89e0a3d0 100644
--- a/utils/JSInfo.h
+++ b/poppler/JSInfo.h
@@ -41,6 +41,9 @@ public:
   // scan and print JS in the PDF
   void scanJS(int nPages, FILE *fout, const UnicodeMap *uMap);
 
+  // scan but exit after finding first JS in the PDF
+  void scanJS(int nPages, bool stopOnFirstJS);
+
   // return true if PDF contains JavaScript
   bool containsJS();
 
@@ -52,6 +55,7 @@ private:
   bool print;
   FILE *file;
   const UnicodeMap *uniMap;
+  bool onlyFirstJS; /* stop scanning after finding first JS */
 
   void scan(int nPages);
   void scanLinkAction(LinkAction *link, const char *action);
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 4b5621fc..6ad884cf 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -86,6 +86,7 @@
 #include "PDFDoc.h"
 #include "Hints.h"
 #include "UTF.h"
+#include "JSInfo.h"
 
 //------------------------------------------------------------------------
 
@@ -2152,3 +2153,9 @@ Page *PDFDoc::getPage(int page)
 
   return catalog->getPage(page);
 }
+
+bool PDFDoc::hasJavascript() {
+  JSInfo jsInfo(this);
+  jsInfo.scanJS (getNumPages(), true);
+  return jsInfo.containsJS();
+}
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 7f4fb167..ca8bf6b6 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -349,6 +349,8 @@ public:
                                      Goffset uxrefOffset, OutStream* outStr, XRef *xRef);
   static void writeXRefStreamTrailer (Object &&trailerDict, XRef *uxref, Ref *uxrefStreamRef,
                                       Goffset uxrefOffset, OutStream* outStr, XRef *xRef);
+  // scans the PDF and returns whether it contains any javascript
+  bool hasJavascript();
 
 private:
   // insert referenced objects in XRef
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index c3df0c3f..3017365b 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -84,8 +84,6 @@ set(pdfimages_SOURCES ${common_srcs}
   pdfimages.cc
   ImageOutputDev.cc
   ImageOutputDev.h
-  JSInfo.cc
-  JSInfo.h
 )
 add_executable(pdfimages ${pdfimages_SOURCES})
 target_link_libraries(pdfimages ${common_libs})
@@ -95,8 +93,6 @@ install(FILES pdfimages.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
 # pdfinfo
 set(pdfinfo_SOURCES ${common_srcs}
   pdfinfo.cc printencodings.cc
-  JSInfo.cc
-  JSInfo.h
 )
 add_executable(pdfinfo ${pdfinfo_SOURCES})
 target_link_libraries(pdfinfo ${common_libs})


More information about the poppler mailing list