[poppler] utils/HtmlOutputDev.cc
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue May 31 21:21:18 UTC 2022
utils/HtmlOutputDev.cc | 50 ++++++++++++++++++++++++++++---------------------
1 file changed, 29 insertions(+), 21 deletions(-)
New commits:
commit ae13fd1f561125be152f3249ca87c8259b22ca6a
Author: Brian Rosenfield <brosenfi at yahoo.com>
Date: Tue May 31 21:21:16 2022 +0000
Fix type 3 font size initialization in pdftohtml using font bounding box
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index 0fb83ba7..e0b25e09 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -177,7 +177,7 @@ HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu *_fonts) :
yMax = y - descent * fontSize;
GfxRGB rgb;
state->getFillRGB(&rgb);
- HtmlFont hfont = HtmlFont(*font, static_cast<int>(fontSize), rgb, state->getFillOpacity());
+ HtmlFont hfont = HtmlFont(*font, std::lround(fontSize), rgb, state->getFillOpacity());
if (isMatRotOrSkew(state->getTextMat())) {
double normalizedMatrix[4];
memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix));
@@ -297,33 +297,41 @@ void HtmlPage::updateFont(GfxState *state)
{
const char *name;
int code;
- double w;
+ double dimLength;
// adjust the font size
fontSize = state->getTransformedFontSize();
const GfxFont *const font = state->getFont().get();
if (font && font->getType() == fontType3) {
- // This is a hack which makes it possible to deal with some Type 3
- // fonts. The problem is that it's impossible to know what the
- // base coordinate system used in the font is without actually
- // rendering the font. This code tries to guess by looking at the
- // width of the character 'm' (which breaks if the font is a
- // subset that doesn't contain 'm').
- for (code = 0; code < 256; ++code) {
- if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && name[0] == 'm' && name[1] == '\0') {
- break;
+ // Grab the font size from the font bounding box if possible - remember to
+ // scale from the glyph coordinate system.
+ const double *fontBBox = font->getFontBBox();
+ const double *fontMat = font->getFontMatrix();
+ dimLength = (fontBBox[3] - fontBBox[1]) * fontMat[3];
+ if (dimLength > 0) {
+ fontSize *= dimLength;
+ } else {
+ // This is a hack which makes it possible to deal with some Type 3
+ // fonts. The problem is that it's impossible to know what the
+ // base coordinate system used in the font is without actually
+ // rendering the font. This code tries to guess by looking at the
+ // width of the character 'm' (which breaks if the font is a
+ // subset that doesn't contain 'm').
+ for (code = 0; code < 256; ++code) {
+ if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && name[0] == 'm' && name[1] == '\0') {
+ break;
+ }
}
- }
- if (code < 256) {
- w = ((Gfx8BitFont *)font)->getWidth(code);
- if (w != 0) {
- // 600 is a generic average 'm' width -- yes, this is a hack
- fontSize *= w / 0.6;
+ if (code < 256) {
+ dimLength = ((Gfx8BitFont *)font)->getWidth(code);
+ if (dimLength != 0) {
+ // 600 is a generic average 'm' width -- yes, this is a hack
+ fontSize *= dimLength / 0.6;
+ }
+ }
+ if (fontMat[0] != 0) {
+ fontSize *= fabs(fontMat[3] / fontMat[0]);
}
- }
- const double *fm = font->getFontMatrix();
- if (fm[0] != 0) {
- fontSize *= fabs(fm[3] / fm[0]);
}
}
}
More information about the poppler
mailing list