[poppler] Branch 'poppler-0.8' - utils/HtmlLinks.cc utils/HtmlOutputDev.cc
Albert Astals Cid
aacid at kemper.freedesktop.org
Mon Jul 28 12:02:31 PDT 2008
utils/HtmlLinks.cc | 33 +++++++++++++++++--
utils/HtmlOutputDev.cc | 85 +++++++++++++++++++++++++++++++------------------
2 files changed, 85 insertions(+), 33 deletions(-)
New commits:
commit 633b5524366ad4c1ead961947eba048db1b180dc
Author: Boris Toloknov <tlknv at yandex.ru>
Date: Mon Jul 28 21:02:07 2008 +0200
make xml output valid xml
diff --git a/utils/HtmlLinks.cc b/utils/HtmlLinks.cc
index 3010be5..136e5e1 100644
--- a/utils/HtmlLinks.cc
+++ b/utils/HtmlLinks.cc
@@ -1,5 +1,7 @@
#include "HtmlLinks.h"
+extern GBool xml;
+
HtmlLink::HtmlLink(const HtmlLink& x){
Xmin=x.Xmin;
Ymin=x.Ymin;
@@ -53,9 +55,36 @@ HtmlLink& HtmlLink::operator=(const HtmlLink& x){
return *this;
}
+static GooString* EscapeSpecialChars( GooString* s )
+{
+ GooString* tmp = NULL;
+ for( int i = 0, j = 0; i < s->getLength(); i++, j++ ){
+ const char *replace = NULL;
+ switch ( s->getChar(i) ){
+ case '"': replace = """; break;
+ case '&': replace = "&"; break;
+ case '<': replace = "<"; break;
+ case '>': replace = ">"; break;
+ default: continue;
+ }
+ if( replace ){
+ if( !tmp ) tmp = new GooString( s );
+ if( tmp ){
+ tmp->del( j, 1 );
+ int l = strlen( replace );
+ tmp->insert( j, replace, l );
+ j += l - 1;
+ }
+ }
+ }
+ return tmp ? tmp : s;
+}
+
GooString* HtmlLink::getLinkStart() {
- GooString *res = new GooString("<A href=\"");
- res->append(dest);
+ GooString *res = new GooString("<a href=\"");
+ GooString *d = xml ? EscapeSpecialChars(dest) : dest;
+ res->append( d );
+ if( d != dest ) delete d;
res->append("\">");
return res;
}
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index 933070d..fa33143 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -343,6 +343,36 @@ void HtmlPage::endString() {
curStr = NULL;
}
+static const char *strrstr( const char *s, const char *ss )
+{
+ const char *p = strstr( s, ss );
+ for( const char *pp = p; pp != NULL; pp = strstr( p+1, ss ) ){
+ p = pp;
+ }
+ return p;
+}
+
+static void CloseTags( GooString *htext, GBool &finish_a, GBool &finish_italic, GBool &finish_bold )
+{
+ const char *last_italic = finish_italic && ( finish_bold || finish_a ) ? strrstr( htext->getCString(), "<i>" ) : NULL;
+ const char *last_bold = finish_bold && ( finish_italic || finish_a ) ? strrstr( htext->getCString(), "<b>" ) : NULL;
+ const char *last_a = finish_a && ( finish_italic || finish_bold ) ? strrstr( htext->getCString(), "<a " ) : NULL;
+ if( finish_a && ( finish_italic || finish_bold ) && last_a > ( last_italic > last_bold ? last_italic : last_bold ) ){
+ htext->append("</a>", 4);
+ finish_a = false;
+ }
+ if( finish_italic && finish_bold && last_italic > last_bold ){
+ htext->append("</i>", 4);
+ finish_italic = false;
+ }
+ if( finish_bold )
+ htext->append("</b>", 4);
+ if( finish_italic )
+ htext->append("</i>", 4);
+ if( finish_a )
+ htext->append("</a>");
+}
+
void HtmlPage::coalesce() {
HtmlString *str1, *str2;
HtmlFont *hfont1, *hfont2;
@@ -504,28 +534,24 @@ void HtmlPage::coalesce() {
++str1->len;
}
- /* fix <i> and <b> if str1 and str2 differ */
- if( hfont1->isBold() && !hfont2->isBold() )
- str1->htext->append("</b>", 4);
- if( hfont1->isItalic() && !hfont2->isItalic() )
- str1->htext->append("</i>", 4);
- if( !hfont1->isBold() && hfont2->isBold() )
- str1->htext->append("<b>", 3);
- if( !hfont1->isItalic() && hfont2->isItalic() )
- str1->htext->append("<i>", 3);
-
- /* now handle switch of links */
+ /* fix <i>, <b> if str1 and str2 differ and handle switch of links */
HtmlLink *hlink1 = str1->getLink();
HtmlLink *hlink2 = str2->getLink();
- if( !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2) ) {
- if(hlink1 != NULL )
- str1->htext->append("</a>");
- if(hlink2 != NULL ) {
- GooString *ls = hlink2->getLinkStart();
- str1->htext->append(ls);
- delete ls;
- }
+ bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2);
+ GBool finish_a = switch_links && hlink1 != NULL;
+ GBool finish_italic = hfont1->isItalic() && ( !hfont2->isItalic() || finish_a );
+ GBool finish_bold = hfont1->isBold() && ( !hfont2->isBold() || finish_a || finish_italic );
+ CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
+ if( switch_links && hlink2 != NULL ) {
+ GooString *ls = hlink2->getLinkStart();
+ str1->htext->append(ls);
+ delete ls;
}
+ if( ( !hfont1->isItalic() || finish_italic ) && hfont2->isItalic() )
+ str1->htext->append("<i>", 3);
+ if( ( !hfont1->isBold() || finish_bold ) && hfont2->isBold() )
+ str1->htext->append("<b>", 3);
+
str1->htext->append(str2->htext);
// str1 now contains href for link of str2 (if it is defined)
@@ -541,12 +567,10 @@ void HtmlPage::coalesce() {
delete str2;
} else { // keep strings separate
// printf("no\n");
- if( hfont1->isBold() )
- str1->htext->append("</b>",4);
- if( hfont1->isItalic() )
- str1->htext->append("</i>",4);
- if(str1->getLink() != NULL )
- str1->htext->append("</a>");
+ GBool finish_a = str1->getLink() != NULL;
+ GBool finish_bold = hfont1->isBold();
+ GBool finish_italic = hfont1->isItalic();
+ CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
str1->xMin = curX; str1->yMin = curY;
str1 = str2;
@@ -564,12 +588,11 @@ void HtmlPage::coalesce() {
}
}
str1->xMin = curX; str1->yMin = curY;
- if( hfont1->isBold() )
- str1->htext->append("</b>",4);
- if( hfont1->isItalic() )
- str1->htext->append("</i>",4);
- if(str1->getLink() != NULL )
- str1->htext->append("</a>");
+
+ GBool finish_bold = hfont1->isBold();
+ GBool finish_italic = hfont1->isItalic();
+ GBool finish_a = str1->getLink() != NULL;
+ CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
#if 0 //~ for debugging
for (str1 = yxStrings; str1; str1 = str1->yxNext) {
More information about the poppler
mailing list