[poppler] utils/HtmlFonts.cc utils/HtmlOutputDev.cc
Albert Astals Cid
aacid at kemper.freedesktop.org
Sat Jun 18 05:42:03 PDT 2011
utils/HtmlFonts.cc | 3 ++-
utils/HtmlOutputDev.cc | 21 +++++++++++++++++++--
2 files changed, 21 insertions(+), 3 deletions(-)
New commits:
commit d4af1c4ef46abf1f11b7215c7b144ce7bb7912eb
Author: Joshua Richardson <joshuarbox-junk1 at yahoo.com>
Date: Sat Jun 18 13:39:54 2011 +0100
Fix vertical spacing issues in pdftohtml output.
Bug 38019
diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc
index e2839e3..2ae9222 100644
--- a/utils/HtmlFonts.cc
+++ b/utils/HtmlFonts.cc
@@ -21,6 +21,7 @@
// Copyright (C) 2008 Boris Toloknov <tlknv at yandex.ru>
// Copyright (C) 2008 Tomas Are Haavet <tomasare at gmail.com>
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
+// Copyright (C) 2011 Joshua Richardson <joshuarbox-junk1 at yahoo.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -331,7 +332,7 @@ GooString* HtmlFontAccu::CSStyle(int i, int j){
tmp->append(iStr);
tmp->append("{font-size:");
tmp->append(Size);
- if( font.getLineSize() != -1 )
+ if( font.getLineSize() != -1 && font.getLineSize() != 0 )
{
lSize = GooString::fromInt(font.getLineSize());
tmp->append("px;line-height:");
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index dd84354..a71bb96 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -28,6 +28,7 @@
// Copyright (C) 2010 Adrian Johnson <ajohnson at redneon.com>
// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
+// Copyright (C) 2011 Joshua Richardson <joshuarbox-junk1 at yahoo.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -61,6 +62,9 @@
#include "HtmlOutputDev.h"
#include "HtmlFonts.h"
+// returns true if x is closer to y than x is to z
+static inline bool IS_CLOSER(float x, float y, float z) { return fabs((x)-(y)) < fabs((x)-(z)); }
+
int HtmlPage::pgNum=0;
int HtmlOutputDev::imgNum=1;
GooList *HtmlOutputDev::imgList=new GooList();
@@ -400,6 +404,9 @@ static void CloseTags( GooString *htext, GBool &finish_a, GBool &finish_italic,
htext->append("</a>");
}
+// Strings are lines of text;
+// This function aims to combine strings into lines and paragraphs if !noMerge
+// It may also strip out duplicate strings (if they are on top of each other); sometimes they are to create a font effect
void HtmlPage::coalesce() {
HtmlString *str1, *str2;
HtmlFont *hfont1, *hfont2;
@@ -478,9 +485,10 @@ void HtmlPage::coalesce() {
while (str1 && (str2 = str1->yxNext)) {
hfont2 = getFont(str2);
- space = str1->yMax - str1->yMin;
+ space = str1->yMax - str1->yMin; // the height of the font's bounding box
horSpace = str2->xMin - str1->xMax;
- addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4);
+ // if strings line up on left-hand side AND they are on subsequent lines, we need a line break
+ addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4) && IS_CLOSER(str2->yMax, str1->yMax + space, str1->yMax);
vertSpace = str2->yMin - str1->yMax;
//printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);
@@ -497,6 +505,15 @@ void HtmlPage::coalesce() {
vertOverlap = 0;
}
+ // Combine strings if:
+ // They appear to be the same font (complex mode only) && going in the same direction AND at least one of the following:
+ // 1. They appear to be part of the same line of text
+ // 2. They appear to be subsequent lines of a paragraph
+ // We assume (1) or (2) above, respectively, based on:
+ // (1) strings overlap vertically AND
+ // horizontal space between end of str1 and start of str2 is consistent with a single space or less;
+ // when rawOrder, the strings have to overlap vertically by at least 50%
+ // (2) Strings flow down the page, but the space between them is not too great, and they are lined up on the left
if (
(
(
More information about the poppler
mailing list