[poppler] utils/HtmlOutputDev.cc

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue May 31 21:21:18 UTC 2022


 utils/HtmlOutputDev.cc |   50 ++++++++++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 21 deletions(-)

New commits:
commit ae13fd1f561125be152f3249ca87c8259b22ca6a
Author: Brian Rosenfield <brosenfi at yahoo.com>
Date:   Tue May 31 21:21:16 2022 +0000

    Fix type 3 font size initialization in pdftohtml using font bounding box

diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index 0fb83ba7..e0b25e09 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -177,7 +177,7 @@ HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu *_fonts) :
         yMax = y - descent * fontSize;
         GfxRGB rgb;
         state->getFillRGB(&rgb);
-        HtmlFont hfont = HtmlFont(*font, static_cast<int>(fontSize), rgb, state->getFillOpacity());
+        HtmlFont hfont = HtmlFont(*font, std::lround(fontSize), rgb, state->getFillOpacity());
         if (isMatRotOrSkew(state->getTextMat())) {
             double normalizedMatrix[4];
             memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix));
@@ -297,33 +297,41 @@ void HtmlPage::updateFont(GfxState *state)
 {
     const char *name;
     int code;
-    double w;
+    double dimLength;
 
     // adjust the font size
     fontSize = state->getTransformedFontSize();
     const GfxFont *const font = state->getFont().get();
     if (font && font->getType() == fontType3) {
-        // This is a hack which makes it possible to deal with some Type 3
-        // fonts.  The problem is that it's impossible to know what the
-        // base coordinate system used in the font is without actually
-        // rendering the font.  This code tries to guess by looking at the
-        // width of the character 'm' (which breaks if the font is a
-        // subset that doesn't contain 'm').
-        for (code = 0; code < 256; ++code) {
-            if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && name[0] == 'm' && name[1] == '\0') {
-                break;
+        // Grab the font size from the font bounding box if possible - remember to
+        // scale from the glyph coordinate system.
+        const double *fontBBox = font->getFontBBox();
+        const double *fontMat = font->getFontMatrix();
+        dimLength = (fontBBox[3] - fontBBox[1]) * fontMat[3];
+        if (dimLength > 0) {
+            fontSize *= dimLength;
+        } else {
+            // This is a hack which makes it possible to deal with some Type 3
+            // fonts.  The problem is that it's impossible to know what the
+            // base coordinate system used in the font is without actually
+            // rendering the font.  This code tries to guess by looking at the
+            // width of the character 'm' (which breaks if the font is a
+            // subset that doesn't contain 'm').
+            for (code = 0; code < 256; ++code) {
+                if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && name[0] == 'm' && name[1] == '\0') {
+                    break;
+                }
             }
-        }
-        if (code < 256) {
-            w = ((Gfx8BitFont *)font)->getWidth(code);
-            if (w != 0) {
-                // 600 is a generic average 'm' width -- yes, this is a hack
-                fontSize *= w / 0.6;
+            if (code < 256) {
+                dimLength = ((Gfx8BitFont *)font)->getWidth(code);
+                if (dimLength != 0) {
+                    // 600 is a generic average 'm' width -- yes, this is a hack
+                    fontSize *= dimLength / 0.6;
+                }
+            }
+            if (fontMat[0] != 0) {
+                fontSize *= fabs(fontMat[3] / fontMat[0]);
             }
-        }
-        const double *fm = font->getFontMatrix();
-        if (fm[0] != 0) {
-            fontSize *= fabs(fm[3] / fm[0]);
         }
     }
 }


More information about the poppler mailing list