[poppler] utils/HtmlFonts.cc utils/HtmlFonts.h utils/HtmlOutputDev.cc utils/HtmlOutputDev.h utils/HtmlUtils.h
Albert Astals Cid
aacid at kemper.freedesktop.org
Thu Aug 18 09:49:26 PDT 2011
utils/HtmlFonts.cc | 64 ++++++++++--------
utils/HtmlFonts.h | 10 ++
utils/HtmlOutputDev.cc | 170 ++++++++++++++++++++++++++++++++++---------------
utils/HtmlOutputDev.h | 8 +-
utils/HtmlUtils.h | 51 ++++++++++++++
5 files changed, 218 insertions(+), 85 deletions(-)
New commits:
commit 86271e4810f714d4ba7a2a6651a9b1d04f653262
Author: Joshua Richardson <jric at chegg.com>
Date: Thu Aug 18 18:48:40 2011 +0200
pdftohtml: Support text rotation
Includes a few other fixlets.
See bug 38586 for more info
diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc
index 2ae9222..c9b558e 100644
--- a/utils/HtmlFonts.cc
+++ b/utils/HtmlFonts.cc
@@ -21,7 +21,8 @@
// Copyright (C) 2008 Boris Toloknov <tlknv at yandex.ru>
// Copyright (C) 2008 Tomas Are Haavet <tomasare at gmail.com>
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
-// Copyright (C) 2011 Joshua Richardson <joshuarbox-junk1 at yahoo.com>
+// Copyright (C) 2011 Joshua Richardson <jric at chegg.com>
+// Copyright (C) 2011 Stephen Reichling <sreichling at chegg.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -29,6 +30,7 @@
//========================================================================
#include "HtmlFonts.h"
+#include "HtmlUtils.h"
#include "GlobalParams.h"
#include "UnicodeMap.h"
#include <stdio.h>
@@ -120,6 +122,7 @@ HtmlFont::HtmlFont(GooString* ftname,int _size, GfxRGB rgb){
size=(_size-1);
italic = gFalse;
bold = gFalse;
+ rotOrSkewed = gFalse;
if (fontname){
if (strstr(fontname->lowerCase()->getCString(),"bold")) bold=gTrue;
@@ -148,6 +151,8 @@ HtmlFont::HtmlFont(const HtmlFont& x){
pos=x.pos;
color=x.color;
if (x.FontName) FontName=new GooString(x.FontName);
+ rotOrSkewed = x.rotOrSkewed;
+ memcpy(rotSkewMat, x.rotSkewMat, sizeof(rotSkewMat));
}
@@ -176,14 +181,15 @@ void HtmlFont::clear(){
/*
- This function is used to compare font uniquily for insertion into
+ This function is used to compare font uniquely for insertion into
the list of all encountered fonts
*/
GBool HtmlFont::isEqual(const HtmlFont& x) const{
- return ((size==x.size) &&
+ return (size==x.size) &&
(lineSize==x.lineSize) &&
(pos==x.pos) && (bold==x.bold) && (italic==x.italic) &&
- (color.isEqual(x.getColor())));
+ (color.isEqual(x.getColor())) && isRotOrSkewed() == x.isRotOrSkewed() &&
+ (!isRotOrSkewed() || rot_matrices_equal(getRotMat(), x.getRotMat()));
}
/*
@@ -232,11 +238,11 @@ GooString* HtmlFont::HtmlFilter(Unicode* u, int uLen) {
for (int i = 0; i < uLen; ++i) {
switch (u[i])
{
- case '"': tmp->append("""); break;
+ case '"': tmp->append("""); break;
case '&': tmp->append("&"); break;
case '<': tmp->append("<"); break;
case '>': tmp->append(">"); break;
- case ' ': tmp->append( !xml && ( i+1 >= uLen || !tmp->getLength() || tmp->getChar( tmp->getLength()-1 ) == ' ' ) ? " " : " " );
+ case ' ': tmp->append( !xml && ( i+1 >= uLen || !tmp->getLength() || tmp->getChar( tmp->getLength()-1 ) == ' ' ) ? " " : " " );
break;
default:
{
@@ -289,29 +295,6 @@ int HtmlFontAccu::AddFont(const HtmlFont& font){
return (accu->size()-1);
}
-// get CSS font name for font #i
-GooString* HtmlFontAccu::getCSStyle(int i, GooString* content, int j){
- GooString *tmp;
- GooString *iStr=GooString::fromInt(i);
- GooString *jStr=GooString::fromInt(j);
-
- if (!xml) {
- tmp = new GooString("<span class=\"ft");
- tmp->append(jStr);
- tmp->append(iStr);
- tmp->append("\">");
- tmp->append(content);
- tmp->append("</span>");
- } else {
- tmp = new GooString("");
- tmp->append(content);
- }
-
- delete jStr;
- delete iStr;
- return tmp;
-}
-
// get CSS font definition for font #i
GooString* HtmlFontAccu::CSStyle(int i, int j){
GooString *tmp=new GooString();
@@ -343,6 +326,29 @@ GooString* HtmlFontAccu::CSStyle(int i, int j){
tmp->append(fontName); //font.getFontName());
tmp->append(";color:");
tmp->append(colorStr);
+ // if there is rotation or skew, include the matrix
+ if (font.isRotOrSkewed()) {
+ const double * const text_mat = font.getRotMat();
+ GooString matrix_str(" matrix(");
+ matrix_str.appendf("{0:10.10g}, {1:10.10g}, {2:10.10g}, {3:10.10g}, 0, 0)",
+ text_mat[0], text_mat[1], text_mat[2], text_mat[3]);
+ tmp->append(";-moz-transform:");
+ tmp->append(&matrix_str);
+ tmp->append(";-webkit-transform:");
+ tmp->append(&matrix_str);
+ tmp->append(";-o-transform:");
+ tmp->append(&matrix_str);
+ tmp->append(";-ms-transform:");
+ tmp->append(&matrix_str);
+ // Todo: 75% is a wild guess that seems to work pretty well;
+ // We probably need to calculate the real percentage
+ // Based on the characteristic baseline and bounding box of current font
+ // PDF origin is at baseline
+ tmp->append(";-moz-transform-origin: left 75%");
+ tmp->append(";-webkit-transform-origin: left 75%");
+ tmp->append(";-o-transform-origin: left 75%");
+ tmp->append(";-ms-transform-origin: left 75%");
+ }
tmp->append(";}");
}
if (xml) {
diff --git a/utils/HtmlFonts.h b/utils/HtmlFonts.h
index 2cdea4b..3e3b028 100644
--- a/utils/HtmlFonts.h
+++ b/utils/HtmlFonts.h
@@ -20,6 +20,7 @@
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
// Copyright (C) 2010 Albert Astals Cid <aacid at kde.org>
// Copyright (C) 2011 Steven Murdoch <Steven.Murdoch at cl.cam.ac.uk>
+// Copyright (C) 2011 Joshua Richardson <jric at chegg.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -62,13 +63,15 @@ class HtmlFont{
int lineSize;
GBool italic;
GBool bold;
+ GBool rotOrSkewed;
int pos; // position of the font name in the fonts array
static GooString *DefaultFont;
GooString *FontName;
HtmlFontColor color;
+ double rotSkewMat[4]; // only four values needed for rotation and skew
public:
- HtmlFont(){FontName=NULL;};
+ HtmlFont(){FontName=NULL; rotOrSkewed = gFalse;}
HtmlFont(GooString* fontname,int _size, GfxRGB rgb);
HtmlFont(const HtmlFont& x);
HtmlFont& operator=(const HtmlFont& x);
@@ -78,9 +81,13 @@ public:
GooString* getFullName();
GBool isItalic() const {return italic;}
GBool isBold() const {return bold;}
+ GBool isRotOrSkewed() const { return rotOrSkewed; }
unsigned int getSize() const {return size;}
int getLineSize() const {return lineSize;}
void setLineSize(int _lineSize) { lineSize = _lineSize; }
+ void setRotMat(const double * const mat)
+ { rotOrSkewed = gTrue; memcpy(rotSkewMat, mat, sizeof(rotSkewMat)); }
+ const double *getRotMat() const { return rotSkewMat; }
GooString* getFontName();
static GooString* getDefaultFont();
static void setDefaultFont(GooString* defaultFont);
@@ -102,7 +109,6 @@ public:
HtmlFont *Get(int i){
return &(*accu)[i];
}
- GooString* getCSStyle (int i,GooString* content, int j = 0);
GooString* CSStyle(int i, int j = 0);
int size() const {return accu->size();}
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index a71bb96..615cf5f 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -28,7 +28,8 @@
// Copyright (C) 2010 Adrian Johnson <ajohnson at redneon.com>
// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
-// Copyright (C) 2011 Joshua Richardson <joshuarbox-junk1 at yahoo.com>
+// Copyright (C) 2011 Joshua Richardson <jric at chegg.com>
+// Copyright (C) 2011 Stephen Reichling <sreichling at chegg.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -46,6 +47,7 @@
#include <stddef.h>
#include <ctype.h>
#include <math.h>
+#include <iostream>
#include "goo/GooString.h"
#include "goo/GooList.h"
#include "UnicodeMap.h"
@@ -61,6 +63,9 @@
#include "GlobalParams.h"
#include "HtmlOutputDev.h"
#include "HtmlFonts.h"
+#include "HtmlUtils.h"
+
+#define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: "
// returns true if x is closer to y than x is to z
static inline bool IS_CLOSER(float x, float y, float z) { return fabs((x)-(y)) < fabs((x)-(z)); }
@@ -80,6 +85,9 @@ extern GBool xml;
extern GBool showHidden;
extern GBool noMerge;
+static GBool debug = gFalse;
+static GooString *gstr_buff0 = NULL; // a workspace in which I format strings
+
static GooString* basename(GooString* str){
char *p=str->getCString();
@@ -102,11 +110,37 @@ static GooString* Dirname(GooString* str){
}
#endif
+static const char *print_matrix(const double *mat) {
+ delete gstr_buff0;
+
+ gstr_buff0 = GooString::format("[{0:g} {1:g} {2:g} {3:g} {4:g} {5:g}]",
+ *mat, mat[1], mat[2], mat[3], mat[4], mat[5]);
+ return gstr_buff0->getCString();
+}
+
+static const char *print_uni_str(const Unicode *u, const unsigned uLen) {
+ GooString *gstr_buff1 = NULL;
+
+ delete gstr_buff0;
+
+ if (!uLen) return "";
+ gstr_buff0 = GooString::format("{0:c}", (*u < 0x7F ? *u & 0xFF : '?'));
+ for (unsigned i = 1; i < uLen; i++) {
+ if (u[i] < 0x7F) {
+ gstr_buff1 = gstr_buff0->append(u[i] < 0x7F ? static_cast<char>(u[i]) & 0xFF : '?');
+ delete gstr_buff0;
+ gstr_buff0 = gstr_buff1;
+ }
+ }
+
+ return gstr_buff0->getCString();
+}
+
//------------------------------------------------------------------------
// HtmlString
//------------------------------------------------------------------------
-HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts) {
+HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* _fonts) : fonts(_fonts) {
GfxFont *font;
double x, y;
@@ -129,6 +163,22 @@ HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts) {
GooString *name = state->getFont()->getName();
if (!name) name = HtmlFont::getDefaultFont(); //new GooString("default");
HtmlFont hfont=HtmlFont(name, static_cast<int>(fontSize-1), rgb);
+ if (isMatRotOrSkew(state->getTextMat())) {
+ double normalizedMatrix[4];
+ memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix));
+ // browser rotates the opposite way
+ // so flip the sign of the angle -> sin() components change sign
+ if (debug)
+ std::cerr << DEBUG << "before transform: " << print_matrix(normalizedMatrix) << std::endl;
+ normalizedMatrix[1] *= -1;
+ normalizedMatrix[2] *= -1;
+ if (debug)
+ std::cerr << DEBUG << "after reflecting angle: " << print_matrix(normalizedMatrix) << std::endl;
+ normalizeRotMat(normalizedMatrix);
+ if (debug)
+ std::cerr << DEBUG << "after norm: " << print_matrix(normalizedMatrix) << std::endl;
+ hfont.setRotMat(normalizedMatrix);
+ }
fontpos = fonts->AddFont(hfont);
} else {
// this means that the PDF file draws text without a current font,
@@ -301,9 +351,27 @@ void HtmlPage::addChar(GfxState *state, double x, double y,
// and is not too far away from it before adding
//if ((UnicodeMap::getDirection(u[0]) != curStr->dir) ||
// XXX
- if (
- (n > 0 &&
- fabs(x1 - curStr->xRight[n-1]) > 0.1 * (curStr->yMax - curStr->yMin))) {
+ if (debug) {
+ double *text_mat = state->getTextMat();
+ // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
+ // sin q is zero iff there is no rotation, or 180 deg. rotation;
+ // for 180 rotation, cos q will be negative
+ if (text_mat[0] < 0 || !is_within(text_mat[1], .1, 0)) {
+ std::cerr << DEBUG << "rotation matrix for \"" << print_uni_str(u, uLen) << '"' << std::endl;
+ std::cerr << "text " << print_matrix(state->getTextMat());
+ }
+ }
+ if (n > 0 && // don't start a new string, unless there is already a string
+ // TODO: the following line assumes that text is flowing left to
+ // right, which will not necessarily be the case, e.g. if rotated;
+ // It assesses whether or not two characters are close enough to
+ // be part of the same string
+ fabs(x1 - curStr->xRight[n-1]) > 0.1 * (curStr->yMax - curStr->yMin) &&
+ // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
+ // sin q is zero iff there is no rotation, or 180 deg. rotation;
+ // for 180 rotation, cos q will be negative
+ !rot_matrices_equal(curStr->getFont().getRotMat(), state->getTextMat()))
+ {
endString();
beginString(state, NULL);
}
@@ -546,13 +614,13 @@ void HtmlPage::coalesce() {
str1->size * sizeof(double));
if (addSpace) {
str1->text[str1->len] = 0x20;
- str1->htext->append(xml?" ":" ");
+ str1->htext->append(xml?" ":" ");
str1->xRight[str1->len] = str2->xMin;
++str1->len;
}
if (addLineBreak) {
str1->text[str1->len] = '\n';
- str1->htext->append("<br>");
+ str1->htext->append("<br/>");
str1->xRight[str1->len] = str2->xMin;
++str1->len;
str1->yMin = str2->yMin;
@@ -660,31 +728,22 @@ void HtmlPage::dumpAsXML(FILE* f,int page){
delete fontCSStyle;
}
- GooString *str, *str1 = NULL;
for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
if (tmp->htext){
- str=new GooString(tmp->htext);
fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin));
fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin));
fprintf(f,"font=\"%d\">", tmp->fontpos);
- str1=fonts->getCSStyle(tmp->fontpos, str);
- fputs(str1->getCString(),f);
- delete str;
- delete str1;
+ fputs(tmp->htext->getCString(),f);
fputs("</text>\n",f);
}
}
fputs("</page>\n",f);
}
-
-void HtmlPage::dumpComplex(FILE *file, int page){
- FILE* pageFile;
+int HtmlPage::dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page) {
GooString* tmp;
char* htmlEncoding;
- if( firstPage == -1 ) firstPage = page;
-
if( !noframes )
{
GooString* pgNum=GooString::fromInt(page);
@@ -700,22 +759,21 @@ void HtmlPage::dumpComplex(FILE *file, int page){
if (!pageFile) {
error(-1, "Couldn't open html file '%s'", tmp->getCString());
delete tmp;
- return;
+ return 1;
}
if (!singleHtml)
- fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n", DOCTYPE, page);
+ fprintf(pageFile,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n", DOCTYPE, page);
else
- fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n\n", DOCTYPE, tmp->getCString());
+ fprintf(pageFile,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>%s</TITLE>\n\n", DOCTYPE, tmp->getCString());
delete tmp;
- htmlEncoding = HtmlOutputDev::mapEncodingToHtml
- (globalParams->getTextEncodingName());
+ htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
if (!singleHtml)
- fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+ fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding);
else
- fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n <br>\n", htmlEncoding);
+ fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding);
}
else
{
@@ -724,12 +782,21 @@ void HtmlPage::dumpComplex(FILE *file, int page){
fprintf(pageFile,"<a name=\"%d\"></a>\n", page);
}
- fprintf(pageFile,"<DIV style=\"position:relative;width:%d;height:%d;\">\n",
- pageWidth, pageHeight);
+ return 0;
+}
+
+void HtmlPage::dumpComplex(FILE *file, int page){
+ FILE* pageFile;
+ GooString* tmp;
+
+ if( firstPage == -1 ) firstPage = page;
+
+ if (dumpComplexHeaders(file, pageFile, page)) { error(-1, "Couldn't write headers."); return; }
tmp=basename(DocName);
fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile);
+ fputs("\tp {margin: 0; padding: 0;}",pageFile);
for(int i=fontsPageMarker;i!=fonts->size();i++) {
GooString *fontCSStyle;
if (!singleHtml)
@@ -747,33 +814,33 @@ void HtmlPage::dumpComplex(FILE *file, int page){
fputs("</HEAD>\n<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile);
}
+ fprintf(pageFile,"<DIV id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n",
+ page, pageWidth, pageHeight);
+
if( !ignore )
{
fprintf(pageFile,
- "<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\">\n",
+ "<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\"/>\n",
pageWidth, pageHeight, tmp->getCString(),
(page-firstPage+1), imgExt->getCString());
}
delete tmp;
- GooString *str, *str1 = NULL;
for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){
if (tmp1->htext){
- str=new GooString(tmp1->htext);
fprintf(pageFile,
- "<DIV style=\"position:absolute;top:%d;left:%d\">",
+ "<P style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft",
xoutRound(tmp1->yMin),
xoutRound(tmp1->xMin));
- fputs("<nobr>",pageFile);
- if (!singleHtml)
- str1=fonts->getCSStyle(tmp1->fontpos, str);
- else
- str1=fonts->getCSStyle(tmp1->fontpos, str, page);
- fputs(str1->getCString(),pageFile);
- delete str;
- delete str1;
- fputs("</nobr></DIV>\n",pageFile);
+ if (!singleHtml) {
+ fputc('0', pageFile);
+ } else {
+ fprintf(pageFile, "%d", page);
+ }
+ fprintf(pageFile,"%d\">", tmp1->fontpos);
+ fputs(tmp1->htext->getCString(), pageFile);
+ fputs("</P>\n", pageFile);
}
}
@@ -801,7 +868,7 @@ void HtmlPage::dump(FILE *f, int pageNum)
int listlen=HtmlOutputDev::imgList->getLength();
for (int i = 0; i < listlen; i++) {
GooString *fName= (GooString *)HtmlOutputDev::imgList->del(0);
- fprintf(f,"<IMG src=\"%s\"><br>\n",fName->getCString());
+ fprintf(f,"<IMG src=\"%s\"/><br/>\n",fName->getCString());
delete fName;
}
HtmlOutputDev::imgNum=1;
@@ -812,7 +879,7 @@ void HtmlPage::dump(FILE *f, int pageNum)
str=new GooString(tmp->htext);
fputs(str->getCString(),f);
delete str;
- fputs("<br>\n",f);
+ fputs("<br/>\n",f);
}
}
fputs("<hr>\n",f);
@@ -879,7 +946,7 @@ GooString* HtmlMetaVar::toString()
result->append(name);
result->append("\" content=\"");
result->append(content);
- result->append("\">");
+ result->append("\"/>");
return result;
}
@@ -920,12 +987,12 @@ void HtmlOutputDev::doFrame(int firstPage){
delete fName;
fName=basename(Docname);
- fputs(DOCTYPE_FRAMES, fContentsFrame);
+ fputs(DOCTYPE, fContentsFrame);
fputs("\n<HTML>",fContentsFrame);
fputs("\n<HEAD>",fContentsFrame);
fprintf(fContentsFrame,"\n<TITLE>%s</TITLE>",docTitle->getCString());
htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
- fprintf(fContentsFrame, "\n<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+ fprintf(fContentsFrame, "\n<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding);
dumpMetaVars(fContentsFrame);
fprintf(fContentsFrame, "</HEAD>\n");
fputs("<FRAMESET cols=\"100,*\">\n",fContentsFrame);
@@ -995,12 +1062,12 @@ HtmlOutputDev::HtmlOutputDev(char *fileName, char *title,
}
delete left;
fputs(DOCTYPE, fContentsFrame);
- fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",fContentsFrame);
+ fputs("<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n", fContentsFrame);
if (doOutline)
{
GooString *str = basename(Docname);
- fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
+ fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br/>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
delete str;
}
}
@@ -1044,10 +1111,9 @@ HtmlOutputDev::HtmlOutputDev(char *fileName, char *title,
}
else
{
- fprintf(page,"%s\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n",
- DOCTYPE, docTitle->getCString());
+ fprintf(page,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>%s</TITLE>\n", DOCTYPE, docTitle->getCString());
- fprintf(page, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+ fprintf(page, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding);
dumpMetaVars(page);
fprintf(page,"</HEAD>\n");
@@ -1113,7 +1179,7 @@ void HtmlOutputDev::startPage(int pageNum, GfxState *state) {
fprintf(fContentsFrame,"<A href=\"%s-%d.html\"",str->getCString(),pageNum);
else
fprintf(fContentsFrame,"<A href=\"%ss.html#%d\"",str->getCString(),pageNum);
- fprintf(fContentsFrame," target=\"contents\" >Page %d</a><br>\n",pageNum);
+ fprintf(fContentsFrame," target=\"contents\" >Page %d</a><br/>\n",pageNum);
}
}
@@ -1520,7 +1586,7 @@ GBool HtmlOutputDev::dumpDocOutline(Catalog* catalog)
return gFalse;
delete str;
bClose = gTrue;
- fputs("<HTML>\n<HEAD>\n<TITLE>Document Outline</TITLE>\n</HEAD>\n<BODY>\n", output);
+ fputs("<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>Document Outline</TITLE>\n</HEAD>\n<BODY>\n", output);
}
}
diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h
index c268ce7..474e3af 100644
--- a/utils/HtmlOutputDev.h
+++ b/utils/HtmlOutputDev.h
@@ -19,6 +19,8 @@
// Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc at gnome.org>
// Copyright (C) 2009 Kovid Goyal <kovid at kovidgoyal.net>
// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
+// Copyright (C) 2011 Joshua Richardson <jric at chegg.com>
+// Copyright (C) 2011 Stephen Reichling <sreichling at chegg.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -52,8 +54,7 @@
#define xoutRound(x) ((int)(x + 0.5))
-#define DOCTYPE "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">"
-#define DOCTYPE_FRAMES "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\"\n\"http://www.w3.org/TR/html4/frameset.dtd\">"
+#define DOCTYPE "<!DOCTYPE html>"
class GfxState;
class GooString;
@@ -83,6 +84,7 @@ public:
double dx, double dy,
Unicode u);
HtmlLink* getLink() { return link; }
+ const HtmlFont &getFont() const { return *fonts->Get(fontpos); }
void endString(); // postprocessing
private:
@@ -100,6 +102,7 @@ private:
int len; // length of text and xRight
int size; // size of text and xRight arrays
UnicodeTextDirection dir; // direction (left to right/right to left)
+ HtmlFontAccu *fonts;
friend class HtmlPage;
@@ -171,6 +174,7 @@ private:
void setDocName(char* fname);
void dumpAsXML(FILE* f,int page);
void dumpComplex(FILE* f, int page);
+ int dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page);
// marks the position of the fonts that belong to current page (for noframes)
int fontsPageMarker;
diff --git a/utils/HtmlUtils.h b/utils/HtmlUtils.h
new file mode 100644
index 0000000..bdb89b9
--- /dev/null
+++ b/utils/HtmlUtils.h
@@ -0,0 +1,51 @@
+//
+// HtmlUtils.h
+//
+// Created on: Jun 8, 2011
+// Author: Joshua Richardson <jric at chegg.com>
+// Copyright 2011
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2011 Joshua Richardson <jric at chegg.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#ifndef HTMLUTILS_H_
+#define HTMLUTILS_H_
+
+#include <math.h> // fabs
+#include "goo/gtypes.h" // GBool
+
+// Returns true iff the difference between a and b is less than the threshold
+// We always use fuzzy math when comparing decimal numbers due to imprecision
+inline GBool is_within(double a, double thresh, double b) {
+ return fabs(a-b) < thresh;
+}
+
+inline GBool rot_matrices_equal(const double * const mat0, const double * const mat1) {
+ return is_within(mat0[0], .1, mat1[0]) && is_within(mat0[1], .1, mat1[1]) &&
+ is_within(mat0[2], .1, mat1[2]) && is_within(mat0[3], .1, mat1[3]);
+}
+
+// rotation is (cos q, sin q, -sin q, cos q, 0, 0)
+// sin q is zero iff there is no rotation, or 180 deg. rotation;
+// for 180 rotation, cos q will be negative
+inline GBool isMatRotOrSkew(const double * const mat) {
+ return mat[0] < 0 || !is_within(mat[1], .1, 0);
+}
+
+// Alters the matrix so that it does not scale a vector's x component;
+// If the matrix does not skew, then that will also normalize the y
+// component, keeping any rotation, but removing scaling.
+inline void normalizeRotMat(double *mat) {
+ double scale = fabs(mat[0] + mat[1]);
+ if (!scale) return;
+ for (int i = 0; i < 4; i++) mat[i] /= scale;
+}
+
+#endif /* HTMLUTILS_H_ */
More information about the poppler
mailing list