[poppler] utils/HtmlFonts.cc utils/HtmlOutputDev.cc

Albert Astals Cid aacid at kemper.freedesktop.org
Sat Jun 18 05:42:03 PDT 2011


 utils/HtmlFonts.cc     |    3 ++-
 utils/HtmlOutputDev.cc |   21 +++++++++++++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

New commits:
commit d4af1c4ef46abf1f11b7215c7b144ce7bb7912eb
Author: Joshua Richardson <joshuarbox-junk1 at yahoo.com>
Date:   Sat Jun 18 13:39:54 2011 +0100

    Fix vertical spacing issues in pdftohtml output.
    
    Bug 38019

diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc
index e2839e3..2ae9222 100644
--- a/utils/HtmlFonts.cc
+++ b/utils/HtmlFonts.cc
@@ -21,6 +21,7 @@
 // Copyright (C) 2008 Boris Toloknov <tlknv at yandex.ru>
 // Copyright (C) 2008 Tomas Are Haavet <tomasare at gmail.com>
 // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
+// Copyright (C) 2011 Joshua Richardson <joshuarbox-junk1 at yahoo.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -331,7 +332,7 @@ GooString* HtmlFontAccu::CSStyle(int i, int j){
      tmp->append(iStr);
      tmp->append("{font-size:");
      tmp->append(Size);
-     if( font.getLineSize() != -1 )
+     if( font.getLineSize() != -1 && font.getLineSize() != 0 )
      {
 	 lSize = GooString::fromInt(font.getLineSize());
 	 tmp->append("px;line-height:");
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index dd84354..a71bb96 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -28,6 +28,7 @@
 // Copyright (C) 2010 Adrian Johnson <ajohnson at redneon.com>
 // Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
 // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
+// Copyright (C) 2011 Joshua Richardson <joshuarbox-junk1 at yahoo.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -61,6 +62,9 @@
 #include "HtmlOutputDev.h"
 #include "HtmlFonts.h"
 
+// returns true if x is closer to y than x is to z
+static inline bool IS_CLOSER(float x, float y, float z) { return fabs((x)-(y)) < fabs((x)-(z)); }
+
 int HtmlPage::pgNum=0;
 int HtmlOutputDev::imgNum=1;
 GooList *HtmlOutputDev::imgList=new GooList();
@@ -400,6 +404,9 @@ static void CloseTags( GooString *htext, GBool &finish_a, GBool &finish_italic,
     htext->append("</a>");
 }
 
+// Strings are lines of text;
+// This function aims to combine strings into lines and paragraphs if !noMerge
+// It may also strip out duplicate strings (if they are on top of each other); sometimes they are to create a font effect
 void HtmlPage::coalesce() {
   HtmlString *str1, *str2;
   HtmlFont *hfont1, *hfont2;
@@ -478,9 +485,10 @@ void HtmlPage::coalesce() {
 
   while (str1 && (str2 = str1->yxNext)) {
     hfont2 = getFont(str2);
-    space = str1->yMax - str1->yMin;
+    space = str1->yMax - str1->yMin; // the height of the font's bounding box
     horSpace = str2->xMin - str1->xMax;
-    addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4);
+    // if strings line up on left-hand side AND they are on subsequent lines, we need a line break
+    addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4) && IS_CLOSER(str2->yMax, str1->yMax + space, str1->yMax);
     vertSpace = str2->yMin - str1->yMax;
 
 //printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);
@@ -497,6 +505,15 @@ void HtmlPage::coalesce() {
     	vertOverlap = 0;
     } 
     
+    // Combine strings if:
+    //  They appear to be the same font (complex mode only) && going in the same direction AND at least one of the following:
+    //  1.  They appear to be part of the same line of text
+    //  2.  They appear to be subsequent lines of a paragraph
+    //  We assume (1) or (2) above, respectively, based on:
+    //  (1)  strings overlap vertically AND
+    //       horizontal space between end of str1 and start of str2 is consistent with a single space or less;
+    //       when rawOrder, the strings have to overlap vertically by at least 50%
+    //  (2)  Strings flow down the page, but the space between them is not too great, and they are lined up on the left
     if (
 	(
 	 (


More information about the poppler mailing list