[poppler] glib/poppler-document.cc poppler/DateInfo.cc poppler/DateInfo.h qt5/src qt6/src utils/pdfinfo.cc utils/pdftohtml.cc

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Jul 29 22:49:01 UTC 2021


 glib/poppler-document.cc    |    2 -
 poppler/DateInfo.cc         |   19 ++++++++++++---
 poppler/DateInfo.h          |    2 -
 qt5/src/poppler-document.cc |    3 +-
 qt6/src/poppler-document.cc |    3 +-
 utils/pdfinfo.cc            |   55 ++++++++++++++++++++------------------------
 utils/pdftohtml.cc          |    3 --
 7 files changed, 47 insertions(+), 40 deletions(-)

New commits:
commit 2636e51212b99359cc940b806d645a9e43c33d74
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Fri Jul 2 22:09:07 2021 +0930

    Date string may be in unicode

diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index eafe9b63..25cc39b4 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -3459,7 +3459,7 @@ GDateTime *_poppler_convert_pdf_date_to_date_time(const GooString *date)
     int year, mon, day, hour, min, sec, tzHours, tzMins;
     char tz;
 
-    if (parseDateString(date->c_str(), &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
+    if (parseDateString(date, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
         if (tz == '+' || tz == '-') {
             gchar *identifier;
 
diff --git a/poppler/DateInfo.cc b/poppler/DateInfo.cc
index 010a0dfb..fef3c05d 100644
--- a/poppler/DateInfo.cc
+++ b/poppler/DateInfo.cc
@@ -26,16 +26,27 @@
 #include <config.h>
 
 #include "glibc.h"
+#include "gmem.h"
 #include "DateInfo.h"
+#include "UTF.h"
 
 #include <cstdio>
 #include <cstring>
 
 /* See PDF Reference 1.3, Section 3.8.2 for PDF Date representation */
-bool parseDateString(const char *dateString, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute)
+bool parseDateString(const GooString *date, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute)
 {
-    if (dateString == nullptr)
-        return false;
+    Unicode *u;
+    int len = TextStringToUCS4(date, &u);
+    GooString s;
+    for (int i = 0; i < len; i++) {
+        // Ignore any non ASCII characters
+        if (u[i] < 128)
+            s.append(u[i]);
+    }
+    gfree(u);
+    const char *dateString = s.c_str();
+
     if (strlen(dateString) < 2)
         return false;
 
@@ -107,7 +118,7 @@ time_t dateStringToTime(const GooString *dateString)
     struct tm tm;
     time_t time;
 
-    if (!parseDateString(dateString->c_str(), &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute))
+    if (!parseDateString(dateString, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute))
         return -1;
 
     tm.tm_year = year - 1900;
diff --git a/poppler/DateInfo.h b/poppler/DateInfo.h
index 640f60d5..2b3f8a69 100644
--- a/poppler/DateInfo.h
+++ b/poppler/DateInfo.h
@@ -28,7 +28,7 @@
 #include "poppler_private_export.h"
 #include <ctime>
 
-bool POPPLER_PRIVATE_EXPORT parseDateString(const char *string, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute);
+bool POPPLER_PRIVATE_EXPORT parseDateString(const GooString *date, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute);
 
 /* Converts the time_t into a PDF Date format string.
  * If timeA is NULL, current time is used.
diff --git a/qt5/src/poppler-document.cc b/qt5/src/poppler-document.cc
index 3460cf53..05fed3d8 100644
--- a/qt5/src/poppler-document.cc
+++ b/qt5/src/poppler-document.cc
@@ -853,7 +853,8 @@ QDateTime convertDate(const char *dateString)
     int year, mon, day, hour, min, sec, tzHours, tzMins;
     char tz;
 
-    if (parseDateString(dateString, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
+    GooString date(dateString);
+    if (parseDateString(&date, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
         QDate d(year, mon, day);
         QTime t(hour, min, sec);
         if (d.isValid() && t.isValid()) {
diff --git a/qt6/src/poppler-document.cc b/qt6/src/poppler-document.cc
index 4725c1b7..46cd7aff 100644
--- a/qt6/src/poppler-document.cc
+++ b/qt6/src/poppler-document.cc
@@ -829,7 +829,8 @@ QDateTime convertDate(const char *dateString)
     int year, mon, day, hour, min, sec, tzHours, tzMins;
     char tz;
 
-    if (parseDateString(dateString, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
+    GooString date(dateString);
+    if (parseDateString(&date, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
         QDate d(year, mon, day);
         QTime t(hour, min, sec);
         if (d.isValid() && t.isValid()) {
diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index cdc88d32..9726c1a9 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -107,30 +107,33 @@ static const ArgDesc argDesc[] = { { "-f", argInt, &firstPage, 0, "first page to
                                    { "-?", argFlag, &printHelp, 0, "print usage information" },
                                    {} };
 
-static void printInfoString(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
+static void printTextString(const GooString *s, const UnicodeMap *uMap)
 {
-    const GooString *s1;
     Unicode *u;
     char buf[8];
-    int i, n, len;
+    int len = TextStringToUCS4(s, &u);
+    for (int i = 0; i < len; i++) {
+        int n = uMap->mapUnicode(u[i], buf, sizeof(buf));
+        fwrite(buf, 1, n, stdout);
+    }
+    gfree(u);
+}
+
+static void printInfoString(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
+{
+    const GooString *s1;
 
     Object obj = infoDict->lookup(key);
     if (obj.isString()) {
         fputs(text, stdout);
         s1 = obj.getString();
-        len = TextStringToUCS4(s1, &u);
-        for (i = 0; i < len; i++) {
-            n = uMap->mapUnicode(u[i], buf, sizeof(buf));
-            fwrite(buf, 1, n, stdout);
-        }
-        gfree(u);
+        printTextString(s1, uMap);
         fputc('\n', stdout);
     }
 }
 
-static void printInfoDate(Dict *infoDict, const char *key, const char *text)
+static void printInfoDate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
 {
-    const char *s;
     int year, mon, day, hour, min, sec, tz_hour, tz_minute;
     char tz;
     struct tm tmStruct;
@@ -140,7 +143,7 @@ static void printInfoDate(Dict *infoDict, const char *key, const char *text)
     Object obj = infoDict->lookup(key);
     if (obj.isString()) {
         fputs(text, stdout);
-        s = obj.getString()->c_str();
+        const GooString *s = obj.getString();
         // TODO do something with the timezone info
         if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) {
             tmStruct.tm_year = year - 1900;
@@ -163,25 +166,24 @@ static void printInfoDate(Dict *infoDict, const char *key, const char *text)
                 strftime(buf, sizeof(buf), "%c %Z", &tmStruct);
                 fputs(buf, stdout);
             } else {
-                fputs(s, stdout);
+                printTextString(s, uMap);
             }
         } else {
-            fputs(s, stdout);
+            printTextString(s, uMap);
         }
         fputc('\n', stdout);
     }
 }
 
-static void printISODate(Dict *infoDict, const char *key, const char *text)
+static void printISODate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
 {
-    const char *s;
     int year, mon, day, hour, min, sec, tz_hour, tz_minute;
     char tz;
 
     Object obj = infoDict->lookup(key);
     if (obj.isString()) {
         fputs(text, stdout);
-        s = obj.getString()->c_str();
+        const GooString *s = obj.getString();
         if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) {
             fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec);
             if (tz_hour == 0 && tz_minute == 0) {
@@ -192,7 +194,7 @@ static void printISODate(Dict *infoDict, const char *key, const char *text)
                     fprintf(stdout, ":%02d", tz_minute);
             }
         } else {
-            fputs(s, stdout);
+            printTextString(obj.getString(), uMap);
         }
         fputc('\n', stdout);
     }
@@ -389,14 +391,7 @@ static void printDestinations(PDFDoc *doc, const UnicodeMap *uMap)
                     printf("%4d ", i);
                     printLinkDest(it.second);
                     printf(" \"");
-                    Unicode *u;
-                    char buf[8];
-                    const int len = TextStringToUCS4(it.first, &u);
-                    for (int j = 0; j < len; j++) {
-                        const int n = uMap->mapUnicode(u[j], buf, sizeof(buf));
-                        fwrite(buf, 1, n, stdout);
-                    }
-                    gfree(u);
+                    printTextString(it.first, uMap);
                     printf("\"\n");
                     delete it.first;
                 }
@@ -657,14 +652,14 @@ static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, b
         printInfoString(info.getDict(), "Creator", "Creator:        ", uMap);
         printInfoString(info.getDict(), "Producer", "Producer:       ", uMap);
         if (isoDates) {
-            printISODate(info.getDict(), "CreationDate", "CreationDate:   ");
-            printISODate(info.getDict(), "ModDate", "ModDate:        ");
+            printISODate(info.getDict(), "CreationDate", "CreationDate:   ", uMap);
+            printISODate(info.getDict(), "ModDate", "ModDate:        ", uMap);
         } else if (rawDates) {
             printInfoString(info.getDict(), "CreationDate", "CreationDate:   ", uMap);
             printInfoString(info.getDict(), "ModDate", "ModDate:        ", uMap);
         } else {
-            printInfoDate(info.getDict(), "CreationDate", "CreationDate:   ");
-            printInfoDate(info.getDict(), "ModDate", "ModDate:        ");
+            printInfoDate(info.getDict(), "CreationDate", "CreationDate:   ", uMap);
+            printInfoDate(info.getDict(), "ModDate", "ModDate:        ", uMap);
         }
     }
 
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 9f3fa17a..ebd72c12 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -447,7 +447,6 @@ static GooString *getInfoString(Dict *infoDict, const char *key)
 static GooString *getInfoDate(Dict *infoDict, const char *key)
 {
     Object obj;
-    const char *s;
     int year, mon, day, hour, min, sec, tz_hour, tz_minute;
     char tz;
     struct tm tmStruct;
@@ -456,7 +455,7 @@ static GooString *getInfoDate(Dict *infoDict, const char *key)
 
     obj = infoDict->lookup(key);
     if (obj.isString()) {
-        s = obj.getString()->c_str();
+        const GooString *s = obj.getString();
         // TODO do something with the timezone info
         if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) {
             tmStruct.tm_year = year - 1900;


More information about the poppler mailing list