[Libreoffice-commits] core.git: Branch 'distro/collabora/cp-5.1' - 46 commits - configure.ac download.lst external/icu filter/source hwpfilter/qa hwpfilter/source i18npool/source lotuswordpro/source sc/inc sc/source sd/source svl/source svx/source sw/source vcl/unx writerfilter/source
Andras Timar
andras.timar at collabora.com
Sun Apr 23 18:54:09 UTC 2017
configure.ac | 2
download.lst | 2
external/icu/UnpackedTarball_icu.mk | 5
external/icu/clang-cl.patch.0 | 26
external/icu/icu4c-changeset-39671.patch.1 | 189 +
external/icu/khmerbreakengine.patch | 1110 ++++++++++
external/icu/khmerdict.dict |binary
filter/source/graphicfilter/icgm/cgm.cxx | 8
filter/source/graphicfilter/icgm/class1.cxx | 17
filter/source/graphicfilter/icgm/class4.cxx | 25
filter/source/graphicfilter/itiff/itiff.cxx | 65
filter/source/msfilter/msdffimp.cxx | 48
filter/source/msfilter/svdfppt.cxx | 45
hwpfilter/qa/cppunit/data/fail/cslist-1.hwp |binary
hwpfilter/source/datecode.h | 23
hwpfilter/source/drawing.h | 4
hwpfilter/source/fontmap.cxx | 20
hwpfilter/source/formula.cxx | 9
hwpfilter/source/formula.h | 2
hwpfilter/source/grammar.cxx | 35
hwpfilter/source/hbox.cxx | 37
hwpfilter/source/hbox.h | 101
hwpfilter/source/hcode.cxx | 2
hwpfilter/source/hfont.cxx | 14
hwpfilter/source/hfont.h | 4
hwpfilter/source/hgzip.cxx | 8
hwpfilter/source/hgzip.h | 2
hwpfilter/source/hinfo.cxx | 77
hwpfilter/source/hinfo.h | 6
hwpfilter/source/hiodev.cxx | 50
hwpfilter/source/hiodev.h | 22
hwpfilter/source/hpara.cxx | 139 -
hwpfilter/source/hpara.h | 12
hwpfilter/source/hstyle.cxx | 7
hwpfilter/source/hstyle.h | 2
hwpfilter/source/htags.cxx | 27
hwpfilter/source/htags.h | 6
hwpfilter/source/hwpeq.cxx | 4
hwpfilter/source/hwpfile.cxx | 176 -
hwpfilter/source/hwpfile.h | 31
hwpfilter/source/hwplib.h | 9
hwpfilter/source/hwpread.cxx | 11
hwpfilter/source/hwpreader.cxx | 704 +++---
hwpfilter/source/hwpreader.hxx | 8
hwpfilter/source/lexer.cxx | 11
hwpfilter/source/list.hxx | 146 -
hwpfilter/source/mapping.h | 2
hwpfilter/source/mzstring.cxx | 39
hwpfilter/source/mzstring.h | 6
hwpfilter/source/nodes.h | 2
hwpfilter/source/solver.cxx | 53
i18npool/source/breakiterator/breakiterator_unicode.cxx | 2
lotuswordpro/source/filter/lwpframelayout.cxx | 7
lotuswordpro/source/filter/lwpframelayout.hxx | 1
lotuswordpro/source/filter/lwpgrfobj.cxx | 8
lotuswordpro/source/filter/lwpobjstrm.cxx | 9
lotuswordpro/source/filter/lwpobjstrm.hxx | 1
sc/inc/refdata.hxx | 2
sc/source/core/data/grouptokenconverter.cxx | 4
sc/source/core/tool/interpr4.cxx | 25
sc/source/core/tool/interpr6.cxx | 6
sc/source/core/tool/refdata.cxx | 5
sc/source/core/tool/token.cxx | 29
sc/source/filter/oox/condformatcontext.cxx | 4
sc/source/ui/StatisticsDialogs/RandomNumberGeneratorDialog.cxx | 2
sc/source/ui/docshell/docfunc.cxx | 2
sc/source/ui/undo/undoblk.cxx | 2
sc/source/ui/vba/vbarange.cxx | 16
sc/source/ui/view/preview.cxx | 6
sd/source/filter/ppt/pptin.cxx | 11
sd/source/filter/ppt/pptin.hxx | 10
sd/source/ui/view/drviews3.cxx | 351 +--
svl/source/numbers/zforscan.cxx | 23
svx/source/table/tablelayouter.cxx | 31
sw/source/core/objectpositioning/anchoredobjectposition.cxx | 8
sw/source/filter/ww8/docxattributeoutput.cxx | 7
sw/source/filter/ww8/ww8graf.cxx | 10
sw/source/filter/ww8/ww8par.hxx | 2
sw/source/filter/ww8/ww8par2.cxx | 44
sw/source/filter/ww8/ww8par6.cxx | 2
sw/source/filter/ww8/ww8scan.cxx | 5
sw/source/filter/ww8/ww8struc.hxx | 2
sw/source/uibase/dbui/dbmgr.cxx | 7
vcl/unx/gtk/a11y/atkutil.cxx | 9
writerfilter/source/dmapper/DomainMapper_Impl.cxx | 16
85 files changed, 2612 insertions(+), 1410 deletions(-)
New commits:
commit e98ddef974d860c153958c517fa19b5a03033638
Author: Andras Timar <andras.timar at collabora.com>
Date: Sun Apr 23 15:35:27 2017 +0200
hwpfilter from libreoffice-5-2 (ofz fixes and other fixes)
Change-Id: If40942f38ab3536257d7e58d5630136307930cac
diff --git a/hwpfilter/source/datecode.h b/hwpfilter/source/datecode.h
index 89507b419157..e35b6e50a58f 100644
--- a/hwpfilter/source/datecode.h
+++ b/hwpfilter/source/datecode.h
@@ -24,28 +24,7 @@ static const hchar defaultform[] =
{
'1', 0x9165, 32, '2', 0xB6A9, 32, '3', 0xB7A9, 0
};
-#ifdef _DATECODE_WEEK_DEFINES_
-static const hchar kor_week[] =
-{
- 0xB7A9, 0xB6A9, 0xD1C1, 0xAE81, 0xA1A2, 0x8B71, 0xC9A1
-};
-static const hchar china_week[] =
-{
- 0x4CC8, 0x4BE4, 0x525A, 0x48D8, 0x45AB, 0x4270, 0x50B4
-};
-static const char eng_week[] = { "SunMonTueWedThuFriSat" };
-static const char eng_mon[] = { "JanFebMarAprMayJunJulAugSepOctNovDec" };
-static const char * const en_mon[] =
-{
- "January", "February", "March", "April", "May", "June", "July",
- "August", "September", "October", "November", "December"
-};
-static const char * const en_week[] =
-{
- "Sunday", "Monday", "Tuesday", "Wednesday",
- "Thursday", "Friday", "Saturday"
-};
-#endif //_DATECODE_WEEK_DEFINES_
+
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/fontmap.cxx b/hwpfilter/source/fontmap.cxx
index 259722ee791d..8de154283ba1 100644
--- a/hwpfilter/source/fontmap.cxx
+++ b/hwpfilter/source/fontmap.cxx
@@ -117,8 +117,15 @@ const struct FontEntry FontMapTab[] =
{"\xbd\xc5\xb8\xed \xb1\xc3\xbc\xad",3, 0.97}
};
-#ifndef WIN32
-#if defined(LINUX)
+#if defined(_WIN32)
+const char* RepFontTab[] =
+{
+ "\xb9\xd9\xc5\xc1", /* 0 */
+ "\xb5\xb8\xbf\xf2", /* 1 */
+ "\xb1\xbc\xb8\xb2", /* 2 */
+ "\xb1\xc3\xbc\xad" /* 3 */
+};
+#elif defined(LINUX)
const char* RepFontTab[] =
{
"\xb9\xe9\xb9\xac \xb9\xd9\xc5\xc1", /* 0 */
@@ -135,15 +142,6 @@ const char* RepFontTab[] =
"Gungso" /* 3 */
};
#endif
-#else
-const char* RepFontTab[] =
-{
- "\xb9\xd9\xc5\xc1", /* 0 */
- "\xb5\xb8\xbf\xf2", /* 1 */
- "\xb1\xbc\xb8\xb2", /* 2 */
- "\xb1\xc3\xbc\xad" /* 3 */
-};
-#endif
int getRepFamilyName(const char* orig, char *buf, double &ratio)
{
diff --git a/hwpfilter/source/formula.cxx b/hwpfilter/source/formula.cxx
index d12c6cbce0ab..3b114f783f72 100644
--- a/hwpfilter/source/formula.cxx
+++ b/hwpfilter/source/formula.cxx
@@ -178,9 +178,8 @@ void Formula::makeExpr(Node *res)
break;
case ID_BLOCK:
makeBlock(tmp);
- //fall-through
+ break;
case ID_BEGIN:
- //fall-through
case ID_END:
break;
}
@@ -568,10 +567,10 @@ void Formula::makeBlock(Node *res)
#endif
}
-int Formula::parse()
+void Formula::parse()
{
Node *res = nullptr;
- if( !eq ) return 0;
+ if( !eq ) return;
if( isHwpEQ ){
MzString a;
// fprintf(stderr,"\n\n[BEFORE]\n[%s]\n",eq);
@@ -627,8 +626,6 @@ int Formula::parse()
nodelist.pop_front();
delete tmpNode;
}
-
- return 0;
}
void Formula::trim()
diff --git a/hwpfilter/source/formula.h b/hwpfilter/source/formula.h
index 22c53f6a4d6f..a4cddc2e8c8a 100644
--- a/hwpfilter/source/formula.h
+++ b/hwpfilter/source/formula.h
@@ -55,7 +55,7 @@ public:
pList = p;
rList = static_cast<XAttributeList *>(pList);
}
- int parse();
+ void parse();
private:
void trim();
void makeMathML(Node *res);
diff --git a/hwpfilter/source/grammar.cxx b/hwpfilter/source/grammar.cxx
index 52ce22005c41..14751da6fbf7 100644
--- a/hwpfilter/source/grammar.cxx
+++ b/hwpfilter/source/grammar.cxx
@@ -361,33 +361,6 @@ static const short yycheck[] = { 11,
It was written by Richard Stallman by simplifying the hairy parser
used when %semantic_parser is specified. */
-#ifndef YYSTACK_USE_ALLOCA
-#ifdef alloca
-#define YYSTACK_USE_ALLOCA
-#else /* alloca not defined */
-#ifdef __GNUC__
-#define YYSTACK_USE_ALLOCA
-#define alloca __builtin_alloca
-#else /* not GNU C. */
-#if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) || (defined (__sun) && defined (__i386))
-#define YYSTACK_USE_ALLOCA
-#include <sal/alloca.h>
-#else /* not sparc */
-#if defined(_AIX)
- #pragma alloca
-#define YYSTACK_USE_ALLOCA
-#endif /* not _AIX */
-#endif /* not sparc */
-#endif /* not GNU C */
-#endif /* alloca not defined */
-#endif /* YYSTACK_USE_ALLOCA not defined */
-
-#ifdef YYSTACK_USE_ALLOCA
-#define YYSTACK_ALLOC alloca
-#else
-#define YYSTACK_ALLOC malloc
-#endif
-
/* Note: there must be only one dollar sign in this file.
It is replaced by the list of actions, each action
as one case of the switch. */
@@ -611,17 +584,15 @@ yynewstate:
yystacksize *= 2;
if (yystacksize > YYMAXDEPTH)
yystacksize = YYMAXDEPTH;
-#ifndef YYSTACK_USE_ALLOCA
yyfree_stacks = 1;
-#endif
- yyss = static_cast<short *>(YYSTACK_ALLOC (yystacksize * sizeof (*yyssp)));
+ yyss = static_cast<short *>(malloc (yystacksize * sizeof (*yyssp)));
memcpy (yyss, yyss1,
size * (unsigned int) sizeof (*yyssp));
- yyvs = static_cast<YYSTYPE *>(YYSTACK_ALLOC (yystacksize * sizeof (*yyvsp)));
+ yyvs = static_cast<YYSTYPE *>(malloc (yystacksize * sizeof (*yyvsp)));
memcpy (yyvs, yyvs1,
size * (unsigned int) sizeof (*yyvsp));
#ifdef YYLSP_NEEDED
- yyls = (YYLTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yylsp));
+ yyls = (YYLTYPE *) malloc (yystacksize * sizeof (*yylsp));
memcpy ((char *)yyls, (char *)yyls1,
size * (unsigned int) sizeof (*yylsp));
#endif
diff --git a/hwpfilter/source/hbox.cxx b/hwpfilter/source/hbox.cxx
index cf027d7091f8..6b39021cf2b4 100644
--- a/hwpfilter/source/hbox.cxx
+++ b/hwpfilter/source/hbox.cxx
@@ -133,9 +133,29 @@ DateCode::DateCode()
{
}
-#define _DATECODE_WEEK_DEFINES_
#include "datecode.h"
+static const hchar kor_week[] =
+{
+ 0xB7A9, 0xB6A9, 0xD1C1, 0xAE81, 0xA1A2, 0x8B71, 0xC9A1
+};
+static const hchar china_week[] =
+{
+ 0x4CC8, 0x4BE4, 0x525A, 0x48D8, 0x45AB, 0x4270, 0x50B4
+};
+static const char eng_week[] = { "SunMonTueWedThuFriSat" };
+static const char eng_mon[] = { "JanFebMarAprMayJunJulAugSepOctNovDec" };
+static const char * const en_mon[] =
+{
+ "January", "February", "March", "April", "May", "June", "July",
+ "August", "September", "October", "November", "December"
+};
+static const char * const en_week[] =
+{
+ "Sunday", "Monday", "Tuesday", "Wednesday",
+ "Thursday", "Friday", "Saturday"
+};
+
hchar_string DateCode::GetString()
{
hchar_string ret;
@@ -324,7 +344,7 @@ TxtBox::TxtBox()
, dummy(0)
, dummy1(0)
, cap_len(0)
- , next(0)
+ , next_box(0)
, dummy2(0)
, reserved1(0)
, cap_pos(0)
@@ -398,12 +418,6 @@ Picture::~Picture()
}
-int Picture::Type()
-{
- return pictype;
-}
-
-
// line(14)
// hidden(15)
Hidden::~Hidden()
diff --git a/hwpfilter/source/hbox.h b/hwpfilter/source/hbox.h
index 87f972cda856..69e14f638e15 100644
--- a/hwpfilter/source/hbox.h
+++ b/hwpfilter/source/hbox.h
@@ -331,7 +331,7 @@ struct TxtBox: public FBox
short dummy1; // to not change structure size */
short cap_len;
- short next;
+ short next_box;
short dummy2; // to not change structure size */
unsigned char reserved1;
/**
@@ -375,11 +375,6 @@ struct TxtBox: public FBox
* @returns Count of cell.
*/
int NCell() { return nCell; }
-/**
- * This is one of table, text-box, equalizer and button
- * @returns Type of this object.
- */
- int Type() { return type; }
virtual bool Read(HWPFile &hwpf) override;
};
@@ -649,7 +644,6 @@ struct Picture: public FBox
Picture();
virtual ~Picture();
- int Type ();
virtual bool Read (HWPFile &hwpf) override;
};
diff --git a/hwpfilter/source/hcode.cxx b/hwpfilter/source/hcode.cxx
index 103b343590aa..271eb9dd4209 100644
--- a/hwpfilter/source/hcode.cxx
+++ b/hwpfilter/source/hcode.cxx
@@ -473,7 +473,7 @@ static hchar lineCharConv(hchar ch)
case 0x3060 + '\'' - 31:
case 0x3060 + '\"' - 31:
ch--;
-
+ SAL_FALLTHROUGH;
case 0x3060 + '\'' - 32:
case 0x3060 + '\"' - 32:
case 0x3060 + '{' - 32:
diff --git a/hwpfilter/source/hfont.cxx b/hwpfilter/source/hfont.cxx
index 72746f23467e..70e394282abc 100644
--- a/hwpfilter/source/hfont.cxx
+++ b/hwpfilter/source/hfont.cxx
@@ -43,18 +43,17 @@ HWPFont::~HWPFont()
}
-int HWPFont::AddFont(int lang, const char *font)
+void HWPFont::AddFont(int lang, const char *font)
{
int nfonts;
if (!(lang >= 0 && lang < NLanguage))
- return 0;
+ return;
nfonts = nFonts[lang];
if (MAXFONTS <= nfonts)
- return 0;
+ return;
strncpy(fontnames[lang] + FONTNAMELEN * nfonts, font, FONTNAMELEN - 1);
nFonts[lang]++;
- return nfonts;
}
@@ -70,7 +69,7 @@ const char *HWPFont::GetFontName(int lang, int id)
static char buffer[FONTNAMELEN];
-bool HWPFont::Read(HWPFile & hwpf)
+void HWPFont::Read(HWPFile & hwpf)
{
int lang = 0;
short nfonts = 0;
@@ -81,7 +80,8 @@ bool HWPFont::Read(HWPFile & hwpf)
hwpf.Read2b(&nfonts, 1);
if (!(nfonts > 0 && nfonts < MAXFONTS))
{
- return !hwpf.SetState(HWP_InvalidFileFormat);
+ (void)hwpf.SetState(HWP_InvalidFileFormat);
+ return;
}
fontnames[lang] = new char[nfonts * FONTNAMELEN];
@@ -92,8 +92,6 @@ bool HWPFont::Read(HWPFile & hwpf)
AddFont(lang, buffer);
}
}
-
- return !hwpf.State();
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/hfont.h b/hwpfilter/source/hfont.h
index 1ed8fa6bb3cd..074346f8f892 100644
--- a/hwpfilter/source/hfont.h
+++ b/hwpfilter/source/hfont.h
@@ -54,7 +54,7 @@ class DLLEXPORT HWPFont
* @param lang Language index
* @param font Name of font family
*/
- int AddFont( int lang, const char *font );
+ void AddFont( int lang, const char *font );
/**
* @param lang Language index
* @param id Index of font
@@ -62,7 +62,7 @@ class DLLEXPORT HWPFont
*/
const char *GetFontName( int lang, int id );
- bool Read( HWPFile &hwpf );
+ void Read( HWPFile &hwpf );
};
#endif /* _HWPFONTS+H_ */
diff --git a/hwpfilter/source/hinfo.cxx b/hwpfilter/source/hinfo.cxx
index 2ca1714045f1..2e5d4b1c9a16 100644
--- a/hwpfilter/source/hinfo.cxx
+++ b/hwpfilter/source/hinfo.cxx
@@ -75,7 +75,7 @@ HWPInfo::~HWPInfo()
* Function for reading document information (128 bytes)
* Document information is the information after the file identification information (30 bytes).
*/
-bool HWPInfo::Read(HWPFile & hwpf)
+void HWPInfo::Read(HWPFile & hwpf)
{
hwpf.Read2b(&cur_col, 1); /* When a document is saving, the paragraph number where the coursor is */
hwpf.Read2b(&cur_row, 1); /* Paragraphs rows */
@@ -86,31 +86,31 @@ bool HWPInfo::Read(HWPFile & hwpf)
// paper geometry information
unsigned short tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
paper.paper_height = tmp16; /* Paper length */
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
paper.paper_width = tmp16; /* Sheet width */
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
paper.top_margin = tmp16; /* Top margin */
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
paper.bottom_margin = tmp16; /* The bottom margin */
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
paper.left_margin = tmp16; /* Left Margin */
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
paper.right_margin = tmp16; /* Right margins */
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
paper.header_length = tmp16; /* Header length */
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
paper.footer_length = tmp16; /* Footer length */
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
paper.gutter_length = tmp16; /* The binding margin */
hwpf.Read2b(&readonly, 1); /* Reserve */
hwpf.Read1b(reserved1, 4); /* Reserve */
@@ -129,22 +129,22 @@ bool HWPInfo::Read(HWPFile & hwpf)
hwpf.Read2b(&countfn,1); /* Number of footnote */
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
splinetext = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
splinefn = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
spfnfn = tmp16;
hwpf.Read1b(&fnchar, 1);
hwpf.Read1b(&fnlinetype, 1);
// border layout
- for (int ii = 0; ii < 4; ++ii)
+ for (int & ii : bordermargin)
{
if (!hwpf.Read2b(tmp16))
- return false;
- bordermargin[ii] = tmp16;
+ return;
+ ii = tmp16;
}
hwpf.Read2b(&borderline, 1);
@@ -156,17 +156,17 @@ bool HWPInfo::Read(HWPFile & hwpf)
hwpf.Read2b(&info_block_len, 1);
if (hwpf.State())
- return false;
+ return;
/* Read the article summary. */
if (!summary.Read(hwpf))
- return false;
+ return;
if (info_block_len > 0)
{
info_block = new unsigned char[info_block_len + 1];
if (!HWPReadInfoBlock(info_block, info_block_len, hwpf))
- return false;
+ return;
}
/* reset the value of hwpf. */
@@ -174,8 +174,6 @@ bool HWPInfo::Read(HWPFile & hwpf)
hwpf.encrypted = encrypted != 0;
hwpf.info_block_len = info_block_len;
hwpf.SetCompressed(hwpf.compressed);
-
- return (!hwpf.State());
}
@@ -197,64 +195,63 @@ bool HWPSummary::Read(HWPFile & hwpf)
}
-bool ParaShape::Read(HWPFile & hwpf)
+void ParaShape::Read(HWPFile & hwpf)
{
pagebreak = 0;
unsigned short tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
left_margin = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
right_margin = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
indent = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
lspacing = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
pspacing_next = tmp16;
hwpf.Read1b(&condense, 1);
hwpf.Read1b(&arrange_type, 1);
- for (int ii = 0; ii < MAXTABS; ii++)
+ for (TabSet & tab : tabs)
{
- hwpf.Read1b(&tabs[ii].type, 1);
- hwpf.Read1b(&tabs[ii].dot_continue, 1);
+ hwpf.Read1b(&tab.type, 1);
+ hwpf.Read1b(&tab.dot_continue, 1);
if (!hwpf.Read2b(tmp16))
- return false;
- tabs[ii].position = tmp16;
+ return;
+ tab.position = tmp16;
}
hwpf.Read1b(&coldef.ncols, 1);
hwpf.Read1b(&coldef.separator, 1);
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
coldef.spacing = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
coldef.columnlen = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
coldef.columnlen0 = tmp16;
hwpf.Read1b(&shade, 1);
hwpf.Read1b(&outline, 1);
hwpf.Read1b(&outline_continue, 1);
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
pspacing_prev = tmp16;
hwpf.Read1b(reserved, 2);
- return (!hwpf.State());
}
-bool CharShape::Read(HWPFile & hwpf)
+void CharShape::Read(HWPFile & hwpf)
{
unsigned short tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
size = tmp16;
hwpf.Read1b(font, NLanguage);
hwpf.Read1b(ratio, NLanguage);
@@ -263,8 +260,6 @@ bool CharShape::Read(HWPFile & hwpf)
hwpf.Read1b(&shade, 1);
hwpf.Read1b(&attr, 1);
hwpf.Read1b(reserved, 4);
-
- return (!hwpf.State());
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/hinfo.h b/hwpfilter/source/hinfo.h
index 565e6a1cb54f..b080150e30c8 100644
--- a/hwpfilter/source/hinfo.h
+++ b/hwpfilter/source/hinfo.h
@@ -205,7 +205,7 @@ class DLLEXPORT HWPInfo
HWPInfo(void);
~HWPInfo(void);
- bool Read(HWPFile &hwpf);
+ void Read(HWPFile &hwpf);
};
@@ -231,7 +231,7 @@ struct CharShape
unsigned char attr;
unsigned char reserved[4];
- bool Read(HWPFile &);
+ void Read(HWPFile &);
};
/* ?? ?????? ???? ?????? */
@@ -284,7 +284,7 @@ struct ParaShape
CharShape *cshape;
unsigned char pagebreak;
- bool Read(HWPFile &);
+ void Read(HWPFile &);
// virtual ~ParaShape();
};
#endif // INCLUDED_HWPFILTER_SOURCE_HINFO_H
diff --git a/hwpfilter/source/hiodev.cxx b/hwpfilter/source/hiodev.cxx
index 677538809760..6ae96117a25c 100644
--- a/hwpfilter/source/hiodev.cxx
+++ b/hwpfilter/source/hiodev.cxx
@@ -20,7 +20,7 @@
#include <stdio.h>
#include <errno.h>
-#ifdef WIN32
+#ifdef _WIN32
# include <io.h>
#else
# include <unistd.h>
diff --git a/hwpfilter/source/hpara.cxx b/hwpfilter/source/hpara.cxx
index bbac6da6844c..cce909ead16e 100644
--- a/hwpfilter/source/hpara.cxx
+++ b/hwpfilter/source/hpara.cxx
@@ -29,29 +29,29 @@
#include "hbox.h"
#include "hutil.h"
-bool LineInfo::Read(HWPFile & hwpf, HWPPara *pPara)
+void LineInfo::Read(HWPFile & hwpf, HWPPara *pPara)
{
if (!hwpf.Read2b(pos))
- return false;
+ return;
unsigned short tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
space_width = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
height = tmp16;
// internal information
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
pgy = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
sx = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
psx = tmp16;
if (!hwpf.Read2b(tmp16))
- return false;
+ return;
pex = tmp16;
height_sp = 0;
@@ -62,8 +62,6 @@ bool LineInfo::Read(HWPFile & hwpf, HWPPara *pPara)
pPara->pshape.reserved[0] = sal::static_int_cast<unsigned char>(pex & 0x01);
pPara->pshape.reserved[1] = sal::static_int_cast<unsigned char>(pex & 0x02);
}
-
- return (!hwpf.State());
}
HWPPara::HWPPara()
@@ -77,18 +75,16 @@ HWPPara::HWPPara()
, etcflag(0)
, ctrlflag(0)
, pstyno(0)
- , pno(0)
+ , cshape(new CharShape)
, linfo(nullptr)
- , cshapep(nullptr)
{
- memset(&cshape, 0, sizeof(cshape));
+ memset(cshape.get(), 0, sizeof(CharShape));
memset(&pshape, 0, sizeof(pshape));
}
HWPPara::~HWPPara()
{
delete[] linfo;
- delete[] cshapep;
}
bool HWPPara::Read(HWPFile & hwpf, unsigned char flag)
@@ -105,18 +101,17 @@ bool HWPPara::Read(HWPFile & hwpf, unsigned char flag)
hwpf.Read4b(&ctrlflag, 1);
hwpf.Read1b(&pstyno, 1);
-
/* Paragraph representative character */
- cshape.Read(hwpf);
+ cshape->Read(hwpf);
if (nch > 0)
- hwpf.AddCharShape(&cshape);
+ hwpf.AddCharShape(cshape);
/* Paragraph paragraphs shape */
if (nch && !reuse_shape)
{
pshape.Read(hwpf);
- pshape.cshape = &cshape;
- pshape.pagebreak = etcflag;
+ pshape.cshape = cshape.get();
+ pshape.pagebreak = etcflag;
}
linfo = ::comphelper::newArray_null<LineInfo>(nline);
@@ -147,23 +142,19 @@ bool HWPPara::Read(HWPFile & hwpf, unsigned char flag)
if (contain_cshape)
{
- cshapep = ::comphelper::newArray_null<CharShape>(nch);
- if (!cshapep)
- {
- perror("Memory Allocation: cshape\n");
- return false;
- }
- memset(cshapep, 0, nch * sizeof(CharShape));
+ cshapep.resize(nch);
for (ii = 0; ii < nch; ii++)
{
+ cshapep[ii].reset(new CharShape);
+ memset(cshapep[ii].get(), 0, sizeof(CharShape));
hwpf.Read1b(&same_cshape, 1);
if (!same_cshape)
{
- cshapep[ii].Read(hwpf);
+ cshapep[ii]->Read(hwpf);
if (nch > 1)
- hwpf.AddCharShape(&cshapep[ii]);
+ hwpf.AddCharShape(cshapep[ii]);
}
else if (ii == 0)
cshapep[ii] = cshape;
@@ -188,15 +179,13 @@ bool HWPPara::Read(HWPFile & hwpf, unsigned char flag)
return nch && !hwpf.State();
}
-
CharShape *HWPPara::GetCharShape(int pos)
{
if (contain_cshape == 0)
- return &cshape;
- return cshapep + pos;
+ return cshape.get();
+ return cshapep[pos].get();
}
-
std::unique_ptr<HBox> HWPPara::readHBox(HWPFile & hwpf)
{
std::unique_ptr<HBox> hbox;
diff --git a/hwpfilter/source/hpara.h b/hwpfilter/source/hpara.h
index 62f19982efc6..126a9ece1482 100644
--- a/hwpfilter/source/hpara.h
+++ b/hwpfilter/source/hpara.h
@@ -69,7 +69,7 @@ struct LineInfo
hunit height_sp;
unsigned short softbreak; // column, page, section
- bool Read(HWPFile &hwpf, HWPPara *para);
+ void Read(HWPFile &hwpf, HWPPara *para);
};
/**
* It represents the paragraph.
@@ -103,12 +103,11 @@ class DLLEXPORT HWPPara
*/
unsigned long ctrlflag;
unsigned char pstyno;
- CharShape cshape; /* When characters are all the same shape */
+ std::shared_ptr<CharShape> cshape; /* When characters are all the same shape */
ParaShape pshape; /* if reuse flag is 0, */
- int pno; /* then run-time only */
LineInfo *linfo;
- CharShape *cshapep;
+ std::vector<std::shared_ptr<CharShape>> cshapep;
/**
* Box object list
*/
diff --git a/hwpfilter/source/hstyle.cxx b/hwpfilter/source/hstyle.cxx
index 6a67d23818de..6c29a500d2b5 100644
--- a/hwpfilter/source/hstyle.cxx
+++ b/hwpfilter/source/hstyle.cxx
@@ -113,7 +113,7 @@ void HWPStyle::SetParaShape(int n, ParaShape * pshapep)
}
-bool HWPStyle::Read(HWPFile & hwpf)
+void HWPStyle::Read(HWPFile & hwpf)
{
CharShape cshape;
ParaShape pshape;
@@ -121,7 +121,7 @@ bool HWPStyle::Read(HWPFile & hwpf)
hwpf.Read2b(&nstyles, 1);
style = ::comphelper::newArray_null<StyleData>(nstyles);
if (!style)
- return false;
+ return;
for (int ii = 0; ii < nstyles; ii++)
{
@@ -133,9 +133,8 @@ bool HWPStyle::Read(HWPFile & hwpf)
SetCharShape(ii, &cshape);
SetParaShape(ii, &pshape);
if (hwpf.State())
- return false;
+ return;
}
- return true;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/hstyle.h b/hwpfilter/source/hstyle.h
index 443102983535..c13523061a74 100644
--- a/hwpfilter/source/hstyle.h
+++ b/hwpfilter/source/hstyle.h
@@ -45,7 +45,7 @@ class DLLEXPORT HWPStyle
void SetCharShape( int n, CharShape *cshapep );
void SetParaShape( int n, ParaShape *pshapep );
- bool Read( HWPFile &hwpf );
+ void Read( HWPFile &hwpf );
};
#endif
/* _HWPSTYLE+H_ */
diff --git a/hwpfilter/source/htags.cxx b/hwpfilter/source/htags.cxx
index eb49626baa34..044f7d2a6a61 100644
--- a/hwpfilter/source/htags.cxx
+++ b/hwpfilter/source/htags.cxx
@@ -25,7 +25,7 @@
#include "hwpfile.h"
#include "htags.h"
-bool HyperText::Read(HWPFile & hwpf)
+void HyperText::Read(HWPFile & hwpf)
{
hwpf.Read1b(filename, 256);
hwpf.Read2b(bookmark, 16);
@@ -41,7 +41,6 @@ bool HyperText::Read(HWPFile & hwpf)
break;
}
}
- return true;
}
@@ -53,7 +52,7 @@ EmPicture::EmPicture(size_t tsize)
else
data = new uchar[size];
}
-#ifdef WIN32
+#ifdef _WIN32
#define unlink _unlink
#endif
EmPicture::~EmPicture()
@@ -84,14 +83,14 @@ OlePicture::OlePicture(int tsize)
size = tsize - 4;
if (size <= 0)
return;
-#ifndef WIN32
+#ifndef _WIN32
pis = new char[size];
#endif
};
OlePicture::~OlePicture()
{
-#ifdef WIN32
+#ifdef _WIN32
if( pis )
pis->Release();
#else
@@ -101,21 +100,21 @@ OlePicture::~OlePicture()
#define FILESTG_SIGNATURE_NORMAL 0xF8995568
-bool OlePicture::Read(HWPFile & hwpf)
+void OlePicture::Read(HWPFile & hwpf)
{
if (size <= 0)
- return false;
+ return;
// We process only FILESTG_SIGNATURE_NORMAL.
hwpf.Read4b(&signature, 1);
if (signature != FILESTG_SIGNATURE_NORMAL)
- return false;
-#ifdef WIN32
+ return;
+#ifdef _WIN32
char *data = new char[size];
if (hwpf.ReadBlock(data,size) == 0)
{
delete [] data;
- return false;
+ return;
}
FILE *fp;
char tname[200];
@@ -124,7 +123,7 @@ bool OlePicture::Read(HWPFile & hwpf)
if (0 == (fp = fopen(tname, "wb")))
{
delete [] data;
- return false;
+ return;
}
fwrite(data, size, 1, fp);
delete [] data;
@@ -135,15 +134,13 @@ bool OlePicture::Read(HWPFile & hwpf)
NULL, 0, &pis) != S_OK ) {
pis = 0;
unlink(tname);
- return false;
+ return;
}
unlink(tname);
#else
if (pis == nullptr || hwpf.ReadBlock(pis, size) == 0)
- return false;
+ return;
#endif
-
- return true;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/htags.h b/hwpfilter/source/htags.h
index 303b84461c9e..292897b47bcd 100644
--- a/hwpfilter/source/htags.h
+++ b/hwpfilter/source/htags.h
@@ -46,7 +46,7 @@ struct HyperText
char macro[325];
uchar type;
char reserve[3];
- bool Read(HWPFile& hwpf);
+ void Read(HWPFile& hwpf);
};
/**
* @short Win32 OLE object
@@ -55,7 +55,7 @@ struct OlePicture
{
int size;
uint signature;
-#ifdef WIN32
+#ifdef _WIN32
IStorage *pis;
#else
char *pis;
@@ -63,7 +63,7 @@ struct OlePicture
explicit OlePicture(int tsize);
~OlePicture(void);
- bool Read(HWPFile& hwpf);
+ void Read(HWPFile& hwpf);
};
#endif // INCLUDED_HWPFILTER_SOURCE_HTAGS_H
diff --git a/hwpfilter/source/hwpeq.cxx b/hwpfilter/source/hwpeq.cxx
index dfd2d0a88a8e..dd461103a1a6 100644
--- a/hwpfilter/source/hwpeq.cxx
+++ b/hwpfilter/source/hwpeq.cxx
@@ -33,7 +33,7 @@ using namespace std;
#include <sal/macros.h>
/* @Man: change the hwp formula to LaTeX */
-#ifdef WIN32
+#ifdef _WIN32
# define ENDL "\r\n"
#else /* !WIN32 */
# define ENDL "\n"
@@ -48,7 +48,7 @@ using namespace std;
#define IS_WS(ch) (strchr(WS, ch))
#define IS_BINARY(ch) (strchr("+-<=>", ch))
-#ifdef WIN32
+#ifdef _WIN32
#define STRICMP stricmp
#else
#define STRICMP strcasecmp
diff --git a/hwpfilter/source/hwpfile.cxx b/hwpfilter/source/hwpfile.cxx
index 22310ea75549..a6f97f9f1a87 100644
--- a/hwpfilter/source/hwpfile.cxx
+++ b/hwpfilter/source/hwpfile.cxx
@@ -176,15 +176,17 @@ int HWPFile::Read1b(void *ptr, size_t nmemb)
return hiodev ? hiodev->read1b(ptr, nmemb) : 0;
}
-int HWPFile::Read2b(void *ptr, size_t nmemb)
+void HWPFile::Read2b(void *ptr, size_t nmemb)
{
- return hiodev ? hiodev->read2b(ptr, nmemb) : 0;
+ if (hiodev)
+ hiodev->read2b(ptr, nmemb);
}
-int HWPFile::Read4b(void *ptr, size_t nmemb)
+void HWPFile::Read4b(void *ptr, size_t nmemb)
{
- return hiodev ? hiodev->read4b(ptr, nmemb) : 0;
+ if (hiodev)
+ hiodev->read4b(ptr, nmemb);
}
@@ -200,9 +202,10 @@ size_t HWPFile::SkipBlock(size_t size)
}
-bool HWPFile::SetCompressed(bool flag)
+void HWPFile::SetCompressed(bool flag)
{
- return hiodev && hiodev->setCompressed(flag);
+ if (hiodev)
+ hiodev->setCompressed(flag);
}
@@ -218,34 +221,34 @@ HIODev *HWPFile::SetIODevice(HIODev * new_hiodev)
// end of HIODev wrapper
-bool HWPFile::InfoRead()
+void HWPFile::InfoRead()
{
- return _hwpInfo.Read(*this);
+ _hwpInfo.Read(*this);
}
-bool HWPFile::FontRead()
+void HWPFile::FontRead()
{
- return _hwpFont.Read(*this);
+ _hwpFont.Read(*this);
}
-bool HWPFile::StyleRead()
+void HWPFile::StyleRead()
{
- return _hwpStyle.Read(*this);
+ _hwpStyle.Read(*this);
}
-bool HWPFile::ParaListRead()
+void HWPFile::ParaListRead()
{
- return ReadParaList(plist);
+ ReadParaList(plist);
}
bool HWPFile::ReadParaList(std::list < HWPPara* > &aplist, unsigned char flag)
{
std::unique_ptr<HWPPara> spNode( new HWPPara );
- unsigned char tmp_etcflag;
- unsigned char prev_etcflag = 0;
+ unsigned char tmp_etcflag;
+ unsigned char prev_etcflag = 0;
while (spNode->Read(*this, flag))
{
if( !(spNode->etcflag & 0x04) ){
@@ -380,7 +383,7 @@ ColumnDef *HWPFile::GetColumnDef(int num)
for(int i = 0; it != columnlist.end() ; ++it, i++){
if( i == num )
- break;
+ break;
}
if( it != columnlist.end() )
@@ -407,8 +410,8 @@ HyperText *HWPFile::GetHyperText()
std::list<HyperText*>::iterator it = hyperlist.begin();
for( int i = 0; it != hyperlist.end(); ++it, i++ ){
- if( i == currenthyper )
- break;
+ if( i == currenthyper )
+ break;
}
currenthyper++;
@@ -459,7 +462,7 @@ CharShape *HWPFile::getCharShape(int index)
{
if (index < 0 || static_cast<unsigned int>(index) >= cslist.size())
return nullptr;
- return cslist[index];
+ return cslist[index].get();
}
FBoxStyle *HWPFile::getFBoxStyle(int index)
@@ -502,22 +505,24 @@ void HWPFile::AddParaShape(ParaShape * pshape)
int nscount = 0;
for(int j = 0 ; j < MAXTABS-1 ; j++)
{
- if( j > 0 && pshape->tabs[j].position == 0 )
- break;
- if( pshape->tabs[0].position == 0 ){
- if( pshape->tabs[j].type || pshape->tabs[j].dot_continue ||
- (pshape->tabs[j].position != 1000 *j) )
- nscount = j;
- }
- else{
- if( pshape->tabs[j].type || pshape->tabs[j].dot_continue ||
- (pshape->tabs[j].position != 1000 * (j + 1)) )
- nscount = j;
+ if( j > 0 && pshape->tabs[j].position == 0 )
+ break;
+ if( pshape->tabs[0].position == 0 ){
+ if( pshape->tabs[j].type || pshape->tabs[j].dot_continue ||
+ (pshape->tabs[j].position != 1000 *j) )
+ nscount = j;
+ }
+ else {
+ if( pshape->tabs[j].type || pshape->tabs[j].dot_continue ||
+ (pshape->tabs[j].position != 1000 * (j + 1)) )
+ nscount = j;
}
}
if( nscount )
pshape->tabs[MAXTABS-1].type = sal::static_int_cast<char>(nscount);
- int value = compareParaShape(pshape);
+
+ int value = compareParaShape(pshape);
+
if( value == 0 || nscount )
{
pshape->index = ++pcount;
@@ -527,11 +532,10 @@ void HWPFile::AddParaShape(ParaShape * pshape)
pshape->index = value;
}
-
-void HWPFile::AddCharShape(CharShape * cshape)
+void HWPFile::AddCharShape(std::shared_ptr<CharShape>& cshape)
{
- int value = compareCharShape(cshape);
- if( value == 0 )
+ int value = compareCharShape(cshape.get());
+ if (value == 0)
{
cshape->index = ++ccount;
cslist.push_back(cshape);
diff --git a/hwpfilter/source/hwpfile.h b/hwpfilter/source/hwpfile.h
index d8b8918176b3..e1fb3236a84d 100644
--- a/hwpfilter/source/hwpfile.h
+++ b/hwpfilter/source/hwpfile.h
@@ -143,11 +143,11 @@ class DLLEXPORT HWPFile
/**
* Reads nmemb short type array from HIODev
*/
- int Read2b( void *ptr, size_t nmemb );
+ void Read2b( void *ptr, size_t nmemb );
/**
* Reads nmemb long type array from HIODev
*/
- int Read4b( void *ptr, size_t nmemb );
+ void Read4b( void *ptr, size_t nmemb );
/**
* Reads some bytes from HIODev not regarding endian's way
* @param size Amount for reading
@@ -164,7 +164,7 @@ class DLLEXPORT HWPFile
/**
* Sets if the stream is compressed
*/
- bool SetCompressed( bool );
+ void SetCompressed( bool );
/**
* Sets current HIODev
*/
@@ -177,19 +177,19 @@ class DLLEXPORT HWPFile
/**
* Reads document information of hwp file from HIODev
*/
- bool InfoRead(void);
+ void InfoRead(void);
/**
* Reads font list of hwp file from HIODev
*/
- bool FontRead(void);
+ void FontRead(void);
/**
* Reads style list of hwp file from HIODev
*/
- bool StyleRead(void);
+ void StyleRead(void);
/**
* Reads paragraph list of hwp file from HIODev
*/
- bool ParaListRead();
+ void ParaListRead();
/* 그림 등의 추가 정보를 읽는다. */
/**
* Reads additional information like embedded image of hwp file from HIODev
@@ -214,7 +214,7 @@ class DLLEXPORT HWPFile
void AddColumnInfo();
void SetColumnDef(ColumnDef *coldef);
void AddParaShape(ParaShape *);
- void AddCharShape(CharShape *);
+ void AddCharShape(std::shared_ptr<CharShape>&);
void AddFBoxStyle(FBoxStyle *);
void AddDateFormat(DateCode *);
void AddHeaderFooter(HeaderFooter *);
@@ -285,7 +285,7 @@ class DLLEXPORT HWPFile
std::list<HyperText*> hyperlist;
int currenthyper;
std::vector<ParaShape*> pslist; /* 스타오피스의 구조상 필요 */
- std::vector<CharShape*> cslist;
+ std::vector<std::shared_ptr<CharShape>> cslist;
std::vector<FBoxStyle*> fbslist;
std::vector<DateCode*> datecodes;
std::vector<HeaderFooter*> headerfooters;
diff --git a/hwpfilter/source/hwplib.h b/hwpfilter/source/hwplib.h
index 5d1324d537cb..39d2f1b171f3 100644
--- a/hwpfilter/source/hwplib.h
+++ b/hwpfilter/source/hwplib.h
@@ -27,24 +27,16 @@
/**
* size of hunit is 4 since hwp96 version
*/
-#ifndef _HCHAR_
-#define _HCHAR_
typedef unsigned short hchar;
typedef int hunit;
typedef unsigned char kchar;
-#endif // _HCHAR_
-#ifndef _UTYPE_
-#define _UTYPE_
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
-#endif /* _UTYPE_ */
typedef ::std::basic_string<hchar> hchar_string;
-#ifndef _ZZRECT_
-#define _ZZRECT_
/**
* @short Point
*/
@@ -86,7 +78,6 @@ typedef struct
int mulX, divX;
int mulY, divY;
} ZZScaleXY;
-#endif /* _ZZRECT_ */
#if !defined(_WIN32) && !defined(MAX_PATH)
# define MAX_PATH 260
diff --git a/hwpfilter/source/hwpreader.cxx b/hwpfilter/source/hwpreader.cxx
index e3197d1c176f..8a8c46049020 100644
--- a/hwpfilter/source/hwpreader.cxx
+++ b/hwpfilter/source/hwpreader.cxx
@@ -157,10 +157,10 @@ sal_Bool HwpReader::filter(const Sequence< PropertyValue >& rDescriptor) throw(R
nTotal += nRead;
}
- if( nTotal == 0 ) return sal_False;
+ if( nTotal == 0 ) return false;
if (hwpfile.ReadHwpFile(stream.release()))
- return sal_False;
+ return false;
if (m_rxDocumentHandler.is())
m_rxDocumentHandler->startDocument();
@@ -198,7 +198,7 @@ sal_Bool HwpReader::filter(const Sequence< PropertyValue >& rDescriptor) throw(R
if (m_rxDocumentHandler.is())
m_rxDocumentHandler->endDocument();
- return sal_True;
+ return true;
}
@@ -1718,7 +1718,7 @@ void HwpReader::makePageStyle()
if( hwpinfo.back_info.type == 1 ){
#ifdef _WIN32
padd("xlink:href", sXML_CDATA,
- hconv(kstr2hstr((uchar*) urltowin(hwpinfo.back_info.filename).c_str()).c_str()));
+ reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr((uchar*) urltowin(hwpinfo.back_info.filename).c_str()).c_str())));
#else
padd("xlink:href", sXML_CDATA,
reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr( reinterpret_cast<uchar const *>(urltounix(hwpinfo.back_info.filename).c_str())).c_str())));
@@ -2736,7 +2736,7 @@ void HwpReader::make_text_p0(HWPPara * para, bool bParaStart)
d->bInHeader = false;
}
padd("text:style-name", sXML_CDATA,
- ascii(getTStyleName(para->cshape.index, buf)));
+ ascii(getTStyleName(para->cshape->index, buf)));
rstartEl("text:span", mxList.get());
mxList->clear();
@@ -2780,8 +2780,8 @@ void HwpReader::make_text_p1(HWPPara * para,bool bParaStart)
hchar_string str;
int n;
int res;
- hchar dest[3];
- int curr = para->cshape.index;
+ hchar dest[3];
+ int curr = para->cshape->index;
unsigned char firstspace = 0;
if( !bParaStart )
@@ -3829,9 +3829,9 @@ void HwpReader::makePicture(Picture * hbox)
padd("xlink:type", sXML_CDATA, "simple");
#ifdef _WIN32
if( hbox->follow[4] != 0 )
- padd("xlink:href", sXML_CDATA, (hconv(kstr2hstr(hbox->follow + 4).c_str())));
+ padd("xlink:href", sXML_CDATA, reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr(hbox->follow + 4).c_str())));
else
- padd("xlink:href", sXML_CDATA, (hconv(kstr2hstr(hbox->follow + 5).c_str())));
+ padd("xlink:href", sXML_CDATA, reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr(hbox->follow + 5).c_str())));
#else
if( hbox->follow[4] != 0 )
padd("xlink:href", sXML_CDATA,
@@ -3890,7 +3890,7 @@ void HwpReader::makePicture(Picture * hbox)
if ( hbox->pictype == PICTYPE_FILE ){
#ifdef _WIN32
sprintf(buf, "file:///%s", hbox->picinfo.picun.path );
- padd("xlink:href", sXML_CDATA, (hconv(kstr2hstr((uchar *) buf).c_str())));
+ padd("xlink:href", sXML_CDATA, reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr((uchar *) buf).c_str())));
#else
padd("xlink:href", sXML_CDATA,
reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr(reinterpret_cast<uchar const *>(urltounix(hbox->picinfo.picun.path).c_str())).c_str())));
@@ -3919,7 +3919,7 @@ void HwpReader::makePicture(Picture * hbox)
}
else{
if( hwpfile.oledata ){
-#ifdef WIN32
+#ifdef _WIN32
LPSTORAGE srcsto;
LPUNKNOWN pObj;
wchar_t pathname[200];
@@ -4780,9 +4780,9 @@ void HwpReader::makeOutline(Outline * hbox)
}
-void HwpReader::parsePara(HWPPara * para, bool bParaStart)
+void HwpReader::parsePara(HWPPara * para)
{
-
+ bool bParaStart = false;
while (para)
{
if( para->nch == 1)
diff --git a/hwpfilter/source/hwpreader.hxx b/hwpfilter/source/hwpreader.hxx
index 2f47aa289179..9a4b813ed70b 100644
--- a/hwpfilter/source/hwpreader.hxx
+++ b/hwpfilter/source/hwpreader.hxx
@@ -106,7 +106,7 @@ private:
void makeTextDecls();
/* -------- Paragraph Parsing --------- */
- void parsePara(HWPPara *para, bool bParaStart = false);
+ void parsePara(HWPPara *para);
void make_text_p0(HWPPara *para, bool bParaStart = false);
void make_text_p1(HWPPara *para, bool bParaStart = false);
void make_text_p3(HWPPara *para, bool bParaStart = false);
@@ -152,7 +152,7 @@ private:
class HwpImportFilter : public WeakImplHelper< XFilter, XImporter, XServiceInfo, XExtendedFilterDetection >
{
public:
- HwpImportFilter(const Reference< XMultiServiceFactory >& rFact);
+ explicit HwpImportFilter(const Reference< XMultiServiceFactory >& rFact);
virtual ~HwpImportFilter();
public:
diff --git a/hwpfilter/source/lexer.cxx b/hwpfilter/source/lexer.cxx
index e7518ab38c25..76ba0ba770e8 100644
--- a/hwpfilter/source/lexer.cxx
+++ b/hwpfilter/source/lexer.cxx
@@ -48,7 +48,7 @@
#ifdef __cplusplus
#include <stdlib.h>
-#ifndef WIN32
+#ifndef _WIN32
#include <unistd.h>
#else
#include <io.h>
@@ -960,7 +960,7 @@ char *yytext;
#include <string.h>
#include "nodes.h"
-#ifdef WIN32
+#ifdef _WIN32
#define strdup _strdup
#define fileno _fileno
#define isatty _isatty
@@ -975,7 +975,7 @@ int yywrap();
}
#endif
-#ifdef WIN32
+#ifdef _WIN32
extern YYSTYPE yylval;
#endif
#ifdef TOKEN_DEBUG
@@ -1023,7 +1023,7 @@ static int yy_top_state YY_PROTO(( void ));
#ifdef YY_MALLOC_DECL
YY_MALLOC_DECL
#else
-#if __STDC__
+#if defined __STDC__ && __STDC__
#ifndef __cplusplus
#include <stdlib.h>
#endif
@@ -1288,6 +1288,7 @@ case 23:
YY_RULE_SETUP
{ token_debug(" ==>Ignore[\\rm]\n"); }
//YY_BREAK
+SAL_FALLTHROUGH;
case 24:
YY_RULE_SETUP
{ yylval.str = yytext+1; token_debug(" ==>General_Iden[%s]\n",yytext+1); return GENERAL_IDEN; }
@@ -1332,10 +1333,12 @@ case 34:
YY_RULE_SETUP
{ yylval.str = yytext+1; token_debug(" ==>Space_Symbol[%s]\n",yytext+1); /*return SPACE_SYMBOL;*/ }
//YY_BREAK
+SAL_FALLTHROUGH;
case 35:
YY_RULE_SETUP
{ yylval.str = strdup("quad"); token_debug(" ==>Space_Symbol[quad]\n"); /* return SPACE_SYMBOL;*/ }
//YY_BREAK
+SAL_FALLTHROUGH;
case 36:
YY_RULE_SETUP
{ yylval.dval = yytext; token_debug(" ==>Digit[%s]\n",yytext); return DIGIT; }
diff --git a/hwpfilter/source/list.hxx b/hwpfilter/source/list.hxx
deleted file mode 100644
index 9cdf7df8ed4f..000000000000
--- a/hwpfilter/source/list.hxx
+++ /dev/null
@@ -1,146 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * This file incorporates work covered by the following license notice:
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed
- * with this work for additional information regarding copyright
- * ownership. The ASF licenses this file to you under the Apache
- * License, Version 2.0 (the "License"); you may not use this file
- * except in compliance with the License. You may obtain a copy of
- * the License at http://www.apache.org/licenses/LICENSE-2.0 .
- */
-
-#ifndef INCLUDED_HWPFILTER_SOURCE_LIST_HXX
-#define INCLUDED_HWPFILTER_SOURCE_LIST_HXX
-
-/**
- * Re-implement a simple container: LinkedList + LinkedListIterator
- *
- * DO NOT USE EXCEPT FOR REPLACING THE ORIGINAL LinkedList/LinkedListIterator!
- * USE STL CONTAINERS FOR NEW CODE!
- *
- * The classes LinkedList and LinkedListIterator were originally
- * implemented in two files LinkedList.cxx/.h, whose license would not
- * allow re-distribution through OpenOffice.org. This file
- * re-implements the same functionality, based on the STL.
- */
-
-#include <cstddef>
-#include <vector>
-
-template<class T>
-class LinkedList
-{
- typedef std::vector<T*> list_t;
- list_t maList;
-
-public:
- /// construct list with one element (pItem) or no element (pItem == NULL)
- explicit LinkedList( T* pItem = 0 );
- ~LinkedList();
-};
-
-/** iterator class for LinkedList<T>. Iterator may travel outside of
- * list using operator++/--, in which case current() must return
- * NULL. */
-template<class T>
-class LinkedListIterator
-{
- // iterator state: reference list + position
- LinkedList<T>* mpList;
- int mnPosition;
-
-public:
- /// construct list with single element
- explicit LinkedListIterator( LinkedList<T>* pList = 0 );
- ~LinkedListIterator();
-
- // bug-compatible with original LinkedList.h/cxx: Ignore parameter!
- void operator++( int ); /// advance iterator by one step (ignore n !!!)
- void operator--( int ); /// go one step backwards (ignore n !!!)
- void operator++(); /// advance iterator by one step
- void operator--(); /// go one step backwards
-
-};
-
-
-// IMPLEMENTATION
-
-// (the implementation of template classes must be accessible to using
-// code, hence this implementation is in the header.)
-
-
-#include <algorithm>
-
-// define assert based on SAL, so we do not introduce a tools dependency
-#include <osl/diagnose.h>
-#define ASSERT(x) OSL_ENSURE((x), " HWP FILTER: " #x)
-
-
-template<class T>
-LinkedList<T>::LinkedList( T* pItem )
-{
- if( pItem != nullptr )
- maList.push_back( pItem );
-}
-
-template<class T>
-LinkedList<T>::~LinkedList()
-{
-}
-
-template<class T>
-LinkedListIterator<T>::LinkedListIterator( LinkedList<T>* pList ) :
- mpList( pList ),
- mnPosition( 0 )
-{
- ASSERT( pList != nullptr );
-}
-
-template<class T>
-LinkedListIterator<T>::~LinkedListIterator()
-{
-}
-
-template<class T>
-void LinkedListIterator<T>::operator++( int )
-{
- ASSERT( mpList != nullptr );
-
- // bug-compatible with LinkedList.cxx: ignore parameter!
- mnPosition ++;
-}
-
-template<class T>
-void LinkedListIterator<T>::operator--( int )
-{
- ASSERT( mpList != nullptr );
-
- // bug-compatible with LinkedList.cxx: ignore parameter!
- mnPosition --;
-}
-
-template<class T>
-void LinkedListIterator<T>::operator++()
-{
- ASSERT( mpList != nullptr );
- mnPosition ++;
-}
-
-template<class T>
-void LinkedListIterator<T>::operator--()
-{
- ASSERT( mpList != nullptr );
- mnPosition --;
-}
-
-#endif
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/mapping.h b/hwpfilter/source/mapping.h
index e8d6c0d33a7c..6a6fb8a912f4 100644
--- a/hwpfilter/source/mapping.h
+++ b/hwpfilter/source/mapping.h
@@ -363,7 +363,7 @@ const struct FormulaEntry FormulaMapTab[] = {
#ifndef DEBUG
hchar_string getMathMLEntity(const char *tex)
{
- static const size_t tabSize = sizeof(FormulaMapTab) / sizeof(FormulaMapTab[0]);
+ static const size_t tabSize = SAL_N_ELEMENTS(FormulaMapTab);
hchar_string buf;
for (size_t i = 0 ; i < tabSize ; i++) {
diff --git a/hwpfilter/source/mzstring.cxx b/hwpfilter/source/mzstring.cxx
index 9f5563a2d70a..b99b9d0044ee 100644
--- a/hwpfilter/source/mzstring.cxx
+++ b/hwpfilter/source/mzstring.cxx
@@ -23,22 +23,14 @@
#include "mzstring.h"
-#ifndef WIN32
-#else
-
- #if defined _MSC_VER
- #pragma warning(push, 1)
- #endif
+#ifdef _WIN32
# include <windows.h>
- #if defined _MSC_VER
- #pragma warning(pop)
- #endif
-#endif /* WIN32 */
+#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#ifndef WIN32
+#ifndef _WIN32
# define wsprintf sprintf
#endif
@@ -65,7 +57,7 @@ MzString::~MzString()
}
-MzString &MzString::operator = (MzString &s)
+MzString &MzString::operator=(const MzString &s)
{
int n = s.length();
if (allocate(n))
@@ -167,29 +159,6 @@ int MzString::rfind(char ch, int pos)
}
-// += operator
-
-MzString &MzString::operator += (char ch)
-{
- append(&ch, 1);
- return *this;
-}
-
-
-MzString &MzString::operator += (const char *str)
-{
- append(str);
- return *this;
-}
-
-
-MzString &MzString::operator += (MzString const &s)
-{
- append(s);
- return *this;
-}
-
-
// << operator
MzString &MzString::operator << (const char *str)
{
diff --git a/hwpfilter/source/mzstring.h b/hwpfilter/source/mzstring.h
index fe84694f1f40..4afb0e54fb8a 100644
--- a/hwpfilter/source/mzstring.h
+++ b/hwpfilter/source/mzstring.h
@@ -82,7 +82,6 @@ class MzString
{
public:
MzString(); // Create an empty string
-// if len = 0, len becomes s.length)
~MzString();
int length() const;
@@ -94,13 +93,10 @@ class MzString
bool resize(int len);
// Assignment
- MzString &operator = (MzString &s);
+ MzString &operator = (const MzString &s);
MzString &operator = (const char *s);
// Appending
- MzString &operator += (char);
- MzString &operator += (const char *);
- MzString &operator += (MzString const &);
MzString &operator << (const char *);
MzString &operator << (char);
diff --git a/hwpfilter/source/nodes.h b/hwpfilter/source/nodes.h
index 7d93f104b0ca..539452244d5a 100644
--- a/hwpfilter/source/nodes.h
+++ b/hwpfilter/source/nodes.h
@@ -22,7 +22,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "list.hxx"
+#include <osl/diagnose.h>
enum IDLIST {
ID_MATHML,
commit 3cdac6bb2defce45342dff04400c7a37bb8a2453
Author: Eike Rathke <erack at redhat.com>
Date: Thu Apr 20 22:06:23 2017 +0200
add ICU changeset-39671 fix for CVE-2017-7867 CVE-2017-7868
http://bugs.icu-project.org/trac/changeset/39671
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=213
https://bugzilla.redhat.com/show_bug.cgi?id=1444101
Reviewed-on: https://gerrit.libreoffice.org/36754
Reviewed-by: Eike Rathke <erack at redhat.com>
Tested-by: Jenkins <ci at libreoffice.org>
(cherry picked from commit c7de8233d15ed0c90fef6c49a54d60cf10119f58)
Backported to older MSVC using the UGLY_SIZEOF_MAPTOUCHARS macro instead
of sizeof(UTF8Buf::mapToUChars).
Change-Id: I4e776ad4fe63c77057b0c823f8672a2b6703346f
Reviewed-on: https://gerrit.libreoffice.org/36776
Tested-by: Jenkins <ci at libreoffice.org>
Reviewed-by: Michael Stahl <mstahl at redhat.com>
(cherry picked from commit 91f5d002884cae1a60768e9caa9d182f41fb7be6)
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
index 4a6a11477af3..86369ef8e43b 100644
--- a/external/icu/UnpackedTarball_icu.mk
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -28,6 +28,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.diff) \
$(if $(filter EMSCRIPTEN,$(OS)),external/icu/icu4c-emscripten.patch.1) \
external/icu/khmerbreakengine.patch \
+ external/icu/icu4c-changeset-39671.patch.1 \
))
$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
diff --git a/external/icu/icu4c-changeset-39671.patch.1 b/external/icu/icu4c-changeset-39671.patch.1
new file mode 100644
index 000000000000..b8ac1385364e
--- /dev/null
+++ b/external/icu/icu4c-changeset-39671.patch.1
@@ -0,0 +1,189 @@
+diff -ur icu.org/source/common/utext.cpp icu/source/common/utext.cpp
+--- icu.org/source/common/utext.cpp 2016-06-15 20:58:17.000000000 +0200
++++ icu/source/common/utext.cpp 2017-04-21 16:38:15.993398034 +0200
+@@ -847,9 +847,15 @@
+ //------------------------------------------------------------------------------
+
+ // Chunk size.
+-// Must be less than 85, because of byte mapping from UChar indexes to native indexes.
+-// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes
+-// to two UChars.)
++// Must be less than 42 (256/6), because of byte mapping from UChar indexes to native indexes.
++// Worst case there are six UTF-8 bytes per UChar.
++// obsolete 6 byte form fd + 5 trails maps to fffd
++// obsolete 5 byte form fc + 4 trails maps to fffd
++// non-shortest 4 byte forms maps to fffd
++// normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit
++// mapToUChars array size must allow for the worst case, 6.
++// This could be brought down to 4, by treating fd and fc as pure illegal,
++// rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros.
+ //
+ enum { UTF8_TEXT_CHUNK_SIZE=32 };
+
+@@ -867,6 +873,15 @@
+ // pair. Doing this is simpler than checking for the edge case.
+ //
+
++// erAck: older MSVC used on libreoffice-5-3 and 5-2 bails out with
++// error C2070: 'unknown': illegal sizeof operand
++// for sizeof(UTF8Buf::mapToUChars)
++// so have an ugly workaround:
++// First define a macro of the original size expression, so a follow-up patch
++// on the original code would fail..
++#define UGLY_MAPTOUCHARS_SIZE (UTF8_TEXT_CHUNK_SIZE*6+6)
++#define UGLY_SIZEOF_MAPTOUCHARS (sizeof(uint8_t)*(UGLY_MAPTOUCHARS_SIZE))
++
+ struct UTF8Buf {
+ int32_t bufNativeStart; // Native index of first char in UChar buf
+ int32_t bufNativeLimit; // Native index following last char in buf.
+@@ -889,7 +904,7 @@
+ // Requires two extra slots,
+ // one for a supplementary starting in the last normal position,
+ // and one for an entry for the buffer limit position.
+- uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
++ uint8_t mapToUChars[UGLY_MAPTOUCHARS_SIZE]; // Map native offset from bufNativeStart to
+ // correspoding offset in filled part of buf.
+ int32_t align;
+ };
+@@ -1032,6 +1047,7 @@
+ // Requested index is in this buffer.
+ u8b = (UTF8Buf *)ut->p; // the current buffer
+ mapIndex = ix - u8b->toUCharsMapStart;
++ U_ASSERT(mapIndex < (int32_t)UGLY_SIZEOF_MAPTOUCHARS);
+ ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
+ return TRUE;
+
+@@ -1298,6 +1314,10 @@
+ // Can only do this if the incoming index is somewhere in the interior of the string.
+ // If index is at the end, there is no character there to look at.
+ if (ix != ut->b) {
++ // Note: this function will only move the index back if it is on a trail byte
++ // and there is a preceding lead byte and the sequence from the lead
++ // through this trail could be part of a valid UTF-8 sequence
++ // Otherwise the index remains unchanged.
+ U8_SET_CP_START(s8, 0, ix);
+ }
+
+@@ -1311,7 +1331,10 @@
+ UChar *buf = u8b->buf;
+ uint8_t *mapToNative = u8b->mapToNative;
+ uint8_t *mapToUChars = u8b->mapToUChars;
+- int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);
++ int32_t toUCharsMapStart = ix - UGLY_SIZEOF_MAPTOUCHARS + 1;
++ // Note that toUCharsMapStart can be negative. Happens when the remaining
++ // text from current position to the beginning is less than the buffer size.
++ // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
+ int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region
+ // at end of buffer to leave room
+ // for a surrogate pair at the
+@@ -1338,6 +1361,7 @@
+ if (c<0x80) {
+ // Special case ASCII range for speed.
+ buf[destIx] = (UChar)c;
++ U_ASSERT(toUCharsMapStart <= srcIx);
+ mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
+ } else {
+@@ -1367,6 +1391,7 @@
+ do {
+ mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
+ } while (sIx >= srcIx);
++ U_ASSERT(toUCharsMapStart <= (srcIx+1));
+
+ // Set native indexing limit to be the current position.
+ // We are processing a non-ascii, non-native-indexing char now;
+@@ -1541,6 +1566,7 @@
+ U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
+ U_ASSERT(index<=ut->chunkNativeLimit);
+ int32_t mapIndex = index - u8b->toUCharsMapStart;
++ U_ASSERT(mapIndex < (int32_t)UGLY_SIZEOF_MAPTOUCHARS);
+ int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
+ U_ASSERT(offset>=0 && offset<=ut->chunkLength);
+ return offset;
+diff -ur icu.org/source/test/intltest/utxttest.cpp icu/source/test/intltest/utxttest.cpp
+--- icu.org/source/test/intltest/utxttest.cpp 2016-06-15 20:58:17.000000000 +0200
++++ icu/source/test/intltest/utxttest.cpp 2017-04-21 16:14:57.383814739 +0200
+@@ -67,6 +67,8 @@
+ if (exec) Ticket10983(); break;
+ case 7: name = "Ticket12130";
+ if (exec) Ticket12130(); break;
++ case 8: name = "Ticket12888";
++ if (exec) Ticket12888(); break;
+ default: name = ""; break;
+ }
+ }
+@@ -1583,3 +1585,63 @@
+ }
+ utext_close(&ut);
+ }
++
++// Ticket 12888: bad handling of illegal utf-8 containing many instances of the archaic, now illegal,
++// six byte utf-8 forms. Original implementation had an assumption that
++// there would be at most three utf-8 bytes per UTF-16 code unit.
++// The five and six byte sequences map to a single replacement character.
++
++void UTextTest::Ticket12888() {
++ const char *badString =
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80";
++
++ UErrorCode status = U_ZERO_ERROR;
++ LocalUTextPointer ut(utext_openUTF8(NULL, badString, -1, &status));
++ TEST_SUCCESS(status);
++ for (;;) {
++ UChar32 c = utext_next32(ut.getAlias());
++ if (c == U_SENTINEL) {
++ break;
++ }
++ }
++ int32_t endIdx = utext_getNativeIndex(ut.getAlias());
++ if (endIdx != (int32_t)strlen(badString)) {
++ errln("%s:%d expected=%d, actual=%d", __FILE__, __LINE__, strlen(badString), endIdx);
++ return;
++ }
++
++ for (int32_t prevIndex = endIdx; prevIndex>0;) {
++ UChar32 c = utext_previous32(ut.getAlias());
++ int32_t currentIndex = utext_getNativeIndex(ut.getAlias());
++ if (c != 0xfffd) {
++ errln("%s:%d (expected, actual, index) = (%d, %d, %d)\n",
++ __FILE__, __LINE__, 0xfffd, c, currentIndex);
++ break;
++ }
++ if (currentIndex != prevIndex - 6) {
++ errln("%s:%d: wrong index. Expected, actual = %d, %d",
++ __FILE__, __LINE__, prevIndex - 6, currentIndex);
++ break;
++ }
++ prevIndex = currentIndex;
++ }
++}
+diff -ur icu.org/source/test/intltest/utxttest.h icu/source/test/intltest/utxttest.h
+--- icu.org/source/test/intltest/utxttest.h 2016-06-15 20:58:17.000000000 +0200
++++ icu/source/test/intltest/utxttest.h 2017-04-21 16:14:57.383814739 +0200
+@@ -38,6 +38,7 @@
+ void Ticket10562();
+ void Ticket10983();
+ void Ticket12130();
++ void Ticket12888();
+
+ private:
+ struct m { // Map between native indices & code points.
commit 0f849dd2b9d789bc01890b9e22772dac2b5d74b3
Author: Martin Hosken <martin_hosken at sil.org>
Date: Fri Apr 15 20:26:08 2016 +0200
reactivate ICU Khmer patch
Patch has been upstreamed with
https://ssl.icu-project.org/trac/ticket/12504
Change-Id: I1f3ddad87a2a6568ced3f9d2b2df3e0af0ee18aa
Reviewed-on: https://gerrit.libreoffice.org/24117
Tested-by: Jenkins <ci at libreoffice.org>
Reviewed-by: Martin Hosken <martin_hosken at sil.org>
Reviewed-by: Eike Rathke <erack at redhat.com>
Tested-by: Eike Rathke <erack at redhat.com>
(cherry picked from commit 4e066825d43400969041669c82d8a4e0bfd91adf)
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
index c48d02556d2d..4a6a11477af3 100644
--- a/external/icu/UnpackedTarball_icu.mk
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -27,6 +27,9 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
external/icu/clang-cl.patch.0 \
$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.diff) \
$(if $(filter EMSCRIPTEN,$(OS)),external/icu/icu4c-emscripten.patch.1) \
+ external/icu/khmerbreakengine.patch \
))
+$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
+
# vim: set noet sw=4 ts=4:
diff --git a/external/icu/khmerbreakengine.patch b/external/icu/khmerbreakengine.patch
index 0687645e8790..8f81f315da3e 100644
--- a/external/icu/khmerbreakengine.patch
+++ b/external/icu/khmerbreakengine.patch
@@ -478,9 +478,9 @@ index f1c874d..3ad1b3f 100644
- UChar32 uc;
- int32_t chars = 0;
- for (;;) {
-- int32_t pcIndex = utext_getNativeIndex(text);
+- int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
- pc = utext_next32(text);
-- int32_t pcSize = utext_getNativeIndex(text) - pcIndex;
+- int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
- chars += pcSize;
- remaining -= pcSize;
- if (remaining <= 0) {
@@ -1000,10 +1000,10 @@ index cb594c6..82f2e77 100644
+ int32_t *prefix, UnicodeSet const* ignoreSet, int32_t minLength) const {
UCharsTrie uct(characters);
- int32_t startingTextIndex = utext_getNativeIndex(text);
+ int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
@@ -53,7 +53,13 @@ int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
- int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex;
+ int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
codePointsMatched += 1;
+ if (ignoreSet != NULL && ignoreSet->contains(c)) {
+ continue;
@@ -1022,11 +1022,11 @@ index cb594c6..82f2e77 100644
- int32_t *prefix) const {
+ int32_t *prefix, UnicodeSet const* ignoreSet, int32_t minLength) const {
BytesTrie bt(characters);
- int32_t startingTextIndex = utext_getNativeIndex(text);
+ int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
int32_t wordCount = 0;
@@ -120,7 +126,13 @@ int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
- int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex;
+ int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
codePointsMatched += 1;
+ if (ignoreSet != NULL && ignoreSet->contains(c)) {
+ continue;
@@ -1081,7 +1081,7 @@ diff --git a/source/data/Makefile.in b/source/data/Makefile.in
index 816c82d..c637d70 100644
--- misc/icu/source/data/Makefile.in
+++ build/icu/source/data/Makefile.in
-@@ -179,7 +179,7 @@ endif
+@@ -181,7 +181,7 @@ endif
endif
endif
@@ -1090,17 +1090,17 @@ index 816c82d..c637d70 100644
ifneq ($(ENABLE_STATIC),)
ifeq ($(PKGDATA_MODE),dll)
$(PKGDATA_INVOKE) $(PKGDATA) -e $(ICUDATA_ENTRY_POINT) -T $(OUTTMPDIR) -p $(ICUDATA_NAME) $(PKGDATA_LIBSTATICNAME) -m static $(PKGDATA_VERSIONING) $(PKGDATA_LIST)
-@@ -563,8 +563,14 @@ $(BRKBLDDIR)/burmesedict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES)
- $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1000 -c -i $(BUILDDIR) $(BRKSRCDIR)/burmesedict.txt $(BRKBLDDIR)/burmesedict.dict
+@@ -564,8 +564,14 @@ $(BRKBLDDIR)/burmesedict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES)
+ $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1000 -c -i $(BUILDDIR) $(DICTSRCDIR)/burmesedict.txt $(BRKBLDDIR)/burmesedict.dict
# TODO: figure out why combining characters are here?
-$(BRKBLDDIR)/khmerdict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES)
-- $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(BRKSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
+- $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(DICTSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
+#$(BRKBLDDIR)/khmerdict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES)
-+# $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(BRKSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
++# $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(DICTSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
+
+#$(MAINBUILDDIR)/khmerdict.stamp: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(BRKSRCDIR)/khmerdict.txt build-local
-+# $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(BRKSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
++# $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(DICTSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
+$(MAINBUILDDIR)/khmerdict.stamp: $(BRKSRCDIR)/khmerdict.dict build-local
+ cp $< $(BRKBLDDIR)
+ echo "timestamp" > $@
commit 05d9fd7b3416ef24e94973c1bdef458636ccaf7f
Author: Eike Rathke <erack at redhat.com>
Date: Wed Apr 13 22:24:25 2016 +0200
upgrade to ICU 57
This does not apply patches
external/icu/khmerbreakengine.patch
external/icu/khmerdict.dict
anymore, as the khmerbreakengine.patch failed to apply with several
hunks of which one was 16k. Asking the patch contributor to follow-up on
this.
Change-Id: I78d4371d04a7b03417d402a222bcd384f02a619e
Reviewed-on: https://gerrit.libreoffice.org/24067
Tested-by: Jenkins <ci at libreoffice.org>
Reviewed-by: Eike Rathke <erack at redhat.com>
Tested-by: Eike Rathke <erack at redhat.com>
(cherry picked from commit c58655c5a221d986fa3c3eed2f28810269205721)
diff --git a/configure.ac b/configure.ac
index 6b003c4fc545..2ebb0ec7194d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -9023,7 +9023,7 @@ SYSTEM_GENBRK=
SYSTEM_GENCCODE=
SYSTEM_GENCMN=
-ICU_MAJOR=56
+ICU_MAJOR=57
ICU_MINOR=1
ICU_RECLASSIFIED_PREPEND_SET_EMPTY="TRUE"
ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER="TRUE"
diff --git a/download.lst b/download.lst
index 53fc12297f5b..ebbba25d9080 100755
--- a/download.lst
+++ b/download.lst
@@ -63,7 +63,7 @@ export HARFBUZZ_TARBALL := harfbuzz-0.9.40.tar.bz2
export HSQLDB_TARBALL := 17410483b5b5f267aa18b7e00b65e6e0-hsqldb_1_8_0.zip
export HUNSPELL_TARBALL := 4967da60b23413604c9e563beacc63b4-hunspell-1.3.3.tar.gz
export HYPHEN_TARBALL := 5ade6ae2a99bc1e9e57031ca88d36dad-hyphen-2.8.8.tar.gz
-export ICU_TARBALL := c4a2d71ff56aec5ebfab2a3f059be99d-icu4c-56_1-src.tgz
+export ICU_TARBALL := 976734806026a4ef8bdd17937c8898b9-icu4c-57_1-src.tgz
export JFREEREPORT_FLOW_ENGINE_TARBALL := ba2930200c9f019c2d93a8c88c651a0f-flow-engine-0.9.4.zip
export JFREEREPORT_FLUTE_TARBALL := d8bd5eed178db6e2b18eeed243f85aa8-flute-1.1.6.zip
export JFREEREPORT_LIBBASE_TARBALL := eeb2c7ddf0d302fba4bfc6e97eac9624-libbase-1.1.6.zip
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
index 4a6a11477af3..c48d02556d2d 100644
--- a/external/icu/UnpackedTarball_icu.mk
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -27,9 +27,6 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
external/icu/clang-cl.patch.0 \
$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.diff) \
$(if $(filter EMSCRIPTEN,$(OS)),external/icu/icu4c-emscripten.patch.1) \
- external/icu/khmerbreakengine.patch \
))
-$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
-
# vim: set noet sw=4 ts=4:
commit aa4b3ec51803ade29323273668a516e7f18bdf95
Author: Martin Hosken <martin_hosken at sil.org>
Date: Tue Mar 22 11:26:52 2016 +0700
Fix wrong pattern definitions in khmer dictionary breaker
Change-Id: I0132196744046391759a6e5110d054feee3deea3
Reviewed-on: https://gerrit.libreoffice.org/23420
Tested-by: Jenkins <ci at libreoffice.org>
Reviewed-by: Martin Hosken <martin_hosken at sil.org>
(cherry picked from commit 7f36f4ce9f9f3d430009ba472d275d038abecb16)
diff --git a/external/icu/khmerbreakengine.patch b/external/icu/khmerbreakengine.patch
index bc0d287929b0..0687645e8790 100644
--- a/external/icu/khmerbreakengine.patch
+++ b/external/icu/khmerbreakengine.patch
@@ -15,8 +15,8 @@ index f1c874d..3ad1b3f 100644
+ fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
+
+ // note Skip Sets contain fIgnoreSet characters too.
-+ fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]]\\u200C\\u200D\\u2060"), status);
-+ fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]]\\u200C\\u200D\\u2060"), status);
++ fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]\\u200C\\u200D\\u2060]"), status);
++ fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]\\u200C\\u200D\\u2060]"), status);
+ fNBeforeSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
}
@@ -332,10 +332,10 @@ index f1c874d..3ad1b3f 100644
+ startZwsp = scanBeforeStart(text, scanStart, breakStart);
+ }
+ utext_setNativeIndex(text, rangeStart);
-+ scanFwdClusters(text, rangeStart, initAfter);
++ scanFwdClusters(text, rangeEnd, initAfter);
+ bool endZwsp = scanAfterEnd(text, utext_nativeLength(text), scanEnd, breakEnd);
+ utext_setNativeIndex(text, rangeEnd - 1);
-+ scanBackClusters(text, rangeEnd, finalBefore);
++ scanBackClusters(text, rangeStart, finalBefore);
+ if (finalBefore < initAfter) { // the whole run is tented so no breaks
+ if (breakStart || fTypes < UBRK_LINE)
+ foundBreaks.push(rangeStart, status);
@@ -539,7 +539,7 @@ index f1c874d..3ad1b3f 100644
+ int32_t ln = lengths.elementAti(j);
+ utext_setNativeIndex(text, ln+ix);
+ int32_t c = utext_current32(text);
-+ while (fPuncSet.contains(c) || fIgnoreSet.contains(c)) {
++ while ((fPuncSet.contains(c) || fIgnoreSet.contains(c)) && ln + i < numCodePts) {
+ ++ln;
+ utext_next32(text);
+ c = utext_current32(text);
commit 55dece94611e1b2a8a1974d11c10050d8d74b5f7
Author: Martin Hosken <martin_hosken at sil.org>
Date: Thu Mar 17 09:57:35 2016 +0700
Fix bug in khmr linebreaking and update dictionary
Change-Id: I2b776925c2c95cb56ccd592d036823c26054e059
Reviewed-on: https://gerrit.libreoffice.org/23316
Tested-by: Jenkins <ci at libreoffice.org>
Reviewed-by: Martin Hosken <martin_hosken at sil.org>
(cherry picked from commit a976a19ca82661d8b459b85f5514b0e4c9222d47)
diff --git a/external/icu/khmerbreakengine.patch b/external/icu/khmerbreakengine.patch
index ba3e392a27f3..bc0d287929b0 100644
--- a/external/icu/khmerbreakengine.patch
+++ b/external/icu/khmerbreakengine.patch
@@ -2,7 +2,7 @@ diff --git a/source/common/dictbe.cpp b/source/common/dictbe.cpp
index f1c874d..3ad1b3f 100644
--- misc/icu/source/common/dictbe.cpp
+++ build/icu/source/common/dictbe.cpp
-@@ -27,8 +27,16 @@ U_NAMESPACE_BEGIN
+@@ -27,8 +27,17 @@ U_NAMESPACE_BEGIN
******************************************************************
*/
@@ -14,13 +14,14 @@ index f1c874d..3ad1b3f 100644
fTypes = breakTypes;
+ fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
+
++ // note Skip Sets contain fIgnoreSet characters too.
+ fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]]\\u200C\\u200D\\u2060"), status);
+ fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]]\\u200C\\u200D\\u2060"), status);
+ fNBeforeSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
}
DictionaryBreakEngine::~DictionaryBreakEngine() {
-@@ -90,7 +98,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
+@@ -90,7 +99,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
utext_setNativeIndex(text, current);
}
@@ -29,7 +30,7 @@ index f1c874d..3ad1b3f 100644
return result;
}
-@@ -101,6 +109,163 @@ DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
+@@ -101,6 +110,169 @@ DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
fSet.compact();
}
@@ -87,6 +88,8 @@ index f1c874d..3ad1b3f 100644
+ }
+ for (int i = 0; i < clusterLimit; ++i) { // scan backwards clusterLimit clusters
+ while (start > textStart) {
++ while (fIgnoreSet.contains(c))
++ c = utext_previous32(text);
+ if (!fMarkSet.contains(c)) {
+ if (fBaseSet.contains(c)) {
+ c = utext_previous32(text);
@@ -125,6 +128,10 @@ index f1c874d..3ad1b3f 100644
+ ++end;
+ }
+ for (int i = 0; i < clusterLimit; ++i) { // scan forwards clusterLimit clusters
++ while (fIgnoreSet.contains(c)) {
++ utext_next32(text);
++ c = utext_current32(text);
++ }
+ if (fBaseSet.contains(c)) {
+ while (end < textEnd) {
+ utext_next32(text);
@@ -193,7 +200,7 @@ index f1c874d..3ad1b3f 100644
/*
******************************************************************
* PossibleWord
-@@ -128,35 +293,35 @@ private:
+@@ -128,35 +302,35 @@ private:
public:
PossibleWord() : count(0), prefix(0), offset(-1), mark(0), current(0) {};
~PossibleWord() {};
@@ -238,242 +245,7 @@ index f1c874d..3ad1b3f 100644
// Dictionary leaves text after longest prefix, not longest word. Back up.
if (count <= 0) {
utext_setNativeIndex(text, start);
-@@ -261,16 +426,16 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
- int32_t current;
- UErrorCode status = U_ZERO_ERROR;
- PossibleWord words[THAI_LOOKAHEAD];
--
-+
- utext_setNativeIndex(text, rangeStart);
--
-+
- while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
- cpWordLength = 0;
- cuWordLength = 0;
-
- // Look for candidate words at the current position
- int32_t candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
--
-+
- // If we found exactly one, use that
- if (candidates == 1) {
- cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text);
-@@ -291,12 +456,12 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
- words[wordsFound%THAI_LOOKAHEAD].markCurrent();
- wordsMatched = 2;
- }
--
-+
- // If we're already at the end of the range, we're done
- if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
- goto foundBest;
- }
--
-+
- // See if any of the possible second words is followed by a third word
- do {
- // If we find a third word, stop right away
-@@ -315,13 +480,13 @@ foundBest:
- cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength();
- wordsFound += 1;
- }
--
-+
- // We come here after having either found a word or not. We look ahead to the
- // next word. If it's not a dictionary word, we will combine it with the word we
- // just found (if there is one), but only if the preceding word does not exceed
- // the threshold.
- // The text iterator should now be positioned at the end of the word we found.
--
-+
- UChar32 uc = 0;
- if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < THAI_ROOT_COMBINE_THRESHOLD) {
- // if it is a dictionary word, do nothing. If it isn't, then if there is
-@@ -357,12 +522,12 @@ foundBest:
- }
- }
- }
--
-+
- // Bump the word count if there wasn't already one
- if (cuWordLength <= 0) {
- wordsFound += 1;
- }
--
-+
- // Update the length with the passed-over characters
- cuWordLength += chars;
- }
-@@ -371,14 +536,14 @@ foundBest:
- utext_setNativeIndex(text, current+cuWordLength);
- }
- }
--
-+
- // Never stop before a combining mark.
- int32_t currPos;
- while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
- utext_next32(text);
- cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
- }
--
-+
- // Look ahead for possible suffixes if a dictionary word does not follow.
- // We do this in code rather than using a rule so that the heuristic
- // resynch continues to function. For example, one of the suffix characters
-@@ -496,16 +661,16 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
- int32_t current;
- UErrorCode status = U_ZERO_ERROR;
- PossibleWord words[LAO_LOOKAHEAD];
--
-+
- utext_setNativeIndex(text, rangeStart);
--
-+
- while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
- cuWordLength = 0;
- cpWordLength = 0;
-
- // Look for candidate words at the current position
- int32_t candidates = words[wordsFound%LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
--
-+
- // If we found exactly one, use that
- if (candidates == 1) {
- cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text);
-@@ -526,12 +691,12 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
- words[wordsFound%LAO_LOOKAHEAD].markCurrent();
- wordsMatched = 2;
- }
--
-+
- // If we're already at the end of the range, we're done
- if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
- goto foundBest;
- }
--
-+
- // See if any of the possible second words is followed by a third word
- do {
- // If we find a third word, stop right away
-@@ -549,7 +714,7 @@ foundBest:
- cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength();
- wordsFound += 1;
- }
--
-+
- // We come here after having either found a word or not. We look ahead to the
- // next word. If it's not a dictionary word, we will combine it withe the word we
- // just found (if there is one), but only if the preceding word does not exceed
-@@ -587,12 +752,12 @@ foundBest:
- }
- }
- }
--
-+
- // Bump the word count if there wasn't already one
- if (cuWordLength <= 0) {
- wordsFound += 1;
- }
--
-+
- // Update the length with the passed-over characters
- cuWordLength += chars;
- }
-@@ -601,14 +766,14 @@ foundBest:
- utext_setNativeIndex(text, current + cuWordLength);
- }
- }
--
-+
- // Never stop before a combining mark.
- int32_t currPos;
- while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
- utext_next32(text);
- cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
- }
--
-+
- // Look ahead for possible suffixes if a dictionary word does not follow.
- // We do this in code rather than using a rule so that the heuristic
- // resynch continues to function. For example, one of the suffix characters
-@@ -689,16 +854,16 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
- int32_t current;
- UErrorCode status = U_ZERO_ERROR;
- PossibleWord words[BURMESE_LOOKAHEAD];
--
-+
- utext_setNativeIndex(text, rangeStart);
--
-+
- while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
- cuWordLength = 0;
- cpWordLength = 0;
-
- // Look for candidate words at the current position
- int32_t candidates = words[wordsFound%BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
--
-+
- // If we found exactly one, use that
- if (candidates == 1) {
- cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text);
-@@ -719,12 +884,12 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
- words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
- wordsMatched = 2;
- }
--
-+
- // If we're already at the end of the range, we're done
- if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
- goto foundBest;
- }
--
-+
- // See if any of the possible second words is followed by a third word
- do {
- // If we find a third word, stop right away
-@@ -742,7 +907,7 @@ foundBest:
- cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength();
- wordsFound += 1;
- }
--
-+
- // We come here after having either found a word or not. We look ahead to the
- // next word. If it's not a dictionary word, we will combine it withe the word we
- // just found (if there is one), but only if the preceding word does not exceed
-@@ -780,12 +945,12 @@ foundBest:
- }
- }
- }
--
-+
- // Bump the word count if there wasn't already one
- if (cuWordLength <= 0) {
- wordsFound += 1;
- }
--
-+
- // Update the length with the passed-over characters
- cuWordLength += chars;
- }
-@@ -794,14 +959,14 @@ foundBest:
- utext_setNativeIndex(text, current + cuWordLength);
- }
- }
--
-+
- // Never stop before a combining mark.
- int32_t currPos;
- while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
- utext_next32(text);
- cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
- }
--
-+
- // Look ahead for possible suffixes if a dictionary word does not follow.
- // We do this in code rather than using a rule so that the heuristic
- // resynch continues to function. For example, one of the suffix characters
-@@ -828,51 +993,28 @@ foundBest:
+@@ -828,51 +1002,28 @@ foundBest:
* KhmerBreakEngine
*/
@@ -536,7 +308,7 @@ index f1c874d..3ad1b3f 100644
}
KhmerBreakEngine::~KhmerBreakEngine() {
-@@ -884,180 +1027,204 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
+@@ -884,180 +1036,204 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const {
@@ -560,10 +332,10 @@ index f1c874d..3ad1b3f 100644
+ startZwsp = scanBeforeStart(text, scanStart, breakStart);
+ }
+ utext_setNativeIndex(text, rangeStart);
-+ scanFwdClusters(text, rangeEnd, initAfter);
++ scanFwdClusters(text, rangeStart, initAfter);
+ bool endZwsp = scanAfterEnd(text, utext_nativeLength(text), scanEnd, breakEnd);
+ utext_setNativeIndex(text, rangeEnd - 1);
-+ scanBackClusters(text, rangeStart, finalBefore);
++ scanBackClusters(text, rangeEnd, finalBefore);
+ if (finalBefore < initAfter) { // the whole run is tented so no breaks
+ if (breakStart || fTypes < UBRK_LINE)
+ foundBreaks.push(rangeStart, status);
@@ -715,7 +487,7 @@ index f1c874d..3ad1b3f 100644
+ if (count == 0) {
+ utext_setNativeIndex(text, ix);
+ int32_t c = utext_current32(text);
-+ if (fPuncSet.contains(c) || c == ZWSP || c == WJ) {
++ if (fPuncSet.contains(c) || fIgnoreSet.contains(c) || c == ZWSP) {
+ values.setElementAt(0, count);
+ lengths.setElementAt(1, count++);
+ } else if (fBaseSet.contains(c)) {
@@ -767,7 +539,7 @@ index f1c874d..3ad1b3f 100644
+ int32_t ln = lengths.elementAti(j);
+ utext_setNativeIndex(text, ln+ix);
+ int32_t c = utext_current32(text);
-+ while (fPuncSet.contains(c)) {
++ while (fPuncSet.contains(c) || fIgnoreSet.contains(c)) {
+ ++ln;
+ utext_next32(text);
+ c = utext_current32(text);
@@ -887,71 +659,6 @@ index f1c874d..3ad1b3f 100644
}
#if !UCONFIG_NO_NORMALIZATION
-@@ -1121,7 +1288,7 @@ static inline int32_t utext_i32_flag(int32_t bitIndex) {
- return (int32_t)1 << bitIndex;
- }
-
--
-+
- /*
- * @param text A UText representing the text
- * @param rangeStart The start of the range of dictionary characters
-@@ -1129,7 +1296,7 @@ static inline int32_t utext_i32_flag(int32_t bitIndex) {
- * @param foundBreaks Output of C array of int32_t break positions, or 0
- * @return The number of breaks found
- */
--int32_t
-+int32_t
- CjkBreakEngine::divideUpDictionaryRange( UText *inText,
- int32_t rangeStart,
- int32_t rangeEnd,
-@@ -1192,7 +1359,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
- if (U_FAILURE(status)) {
- return 0;
- }
--
-+
- UnicodeString fragment;
- UnicodeString normalizedFragment;
- for (int32_t srcI = 0; srcI < inString.length();) { // Once per normalization chunk
-@@ -1261,7 +1428,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
- }
- }
- }
--
-+
- // bestSnlp[i] is the snlp of the best segmentation of the first i
- // code points in the range to be matched.
- UVector32 bestSnlp(numCodePts + 1, status);
-@@ -1271,7 +1438,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
- }
-
-
-- // prev[i] is the index of the last CJK code point in the previous word in
-+ // prev[i] is the index of the last CJK code point in the previous word in
- // the best segmentation of the first i characters.
- UVector32 prev(numCodePts + 1, status);
- for(int32_t i = 0; i <= numCodePts; i++){
-@@ -1305,8 +1472,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
- // Note: lengths is filled with code point lengths
- // The NULL parameter is the ignored code unit lengths.
-
-- // if there are no single character matches found in the dictionary
-- // starting with this charcter, treat character as a 1-character word
-+ // if there are no single character matches found in the dictionary
-+ // starting with this charcter, treat character as a 1-character word
- // with the highest value possible, i.e. the least likely to occur.
- // Exclude Korean characters from this treatment, as they should be left
- // together by default.
-@@ -1380,7 +1547,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
- numBreaks++;
- }
-
-- // Now that we're done, convert positions in t_boundary[] (indices in
-+ // Now that we're done, convert positions in t_boundary[] (indices in
- // the normalized input string) back to indices in the original input UText
- // while reversing t_boundary and pushing values to foundBreaks.
- for (int32_t i = numBreaks-1; i >= 0; i--) {
diff --git a/source/common/dictbe.h b/source/common/dictbe.h
index d3488cd..26caa75 100644
--- misc/icu/source/common/dictbe.h
diff --git a/external/icu/khmerdict.dict b/external/icu/khmerdict.dict
index c935cd088659..52605b65469d 100644
Binary files a/external/icu/khmerdict.dict and b/external/icu/khmerdict.dict differ
commit 15b4bad58196d19239d1dff615fa61fe7f15a07f
Author: Martin Hosken <martin_hosken at sil.org>
Date: Fri Jan 8 16:41:52 2016 +0700
Fix applying external dict to icu, and khmer break engine fixes
Change-Id: Ib897e5fa5e80f75f501694dbf874aabd92253b25
Reviewed-on: https://gerrit.libreoffice.org/21247
Tested-by: Jenkins <ci at libreoffice.org>
Reviewed-by: Martin Hosken <martin_hosken at sil.org>
(cherry picked from commit 39b718dd655220110523b7013e65ea4f821aedf7)
diff --git a/external/icu/khmerbreakengine.patch b/external/icu/khmerbreakengine.patch
index 03e6079b19f0..ba3e392a27f3 100644
--- a/external/icu/khmerbreakengine.patch
+++ b/external/icu/khmerbreakengine.patch
@@ -14,8 +14,8 @@ index f1c874d..3ad1b3f 100644
fTypes = breakTypes;
+ fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
+
-+ fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]]"), status);
-+ fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]]"), status);
++ fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]]\\u200C\\u200D\\u2060"), status);
++ fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]]\\u200C\\u200D\\u2060"), status);
+ fNBeforeSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
}
@@ -473,7 +473,7 @@ index f1c874d..3ad1b3f 100644
// Look ahead for possible suffixes if a dictionary word does not follow.
// We do this in code rather than using a rule so that the heuristic
// resynch continues to function. For example, one of the suffix characters
-@@ -828,51 +993,29 @@ foundBest:
+@@ -828,51 +993,28 @@ foundBest:
* KhmerBreakEngine
*/
@@ -506,7 +506,7 @@ index f1c874d..3ad1b3f 100644
setCharacters(fKhmerWordSet);
}
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
- fMarkSet.add(0x0020);
+- fMarkSet.add(0x0020);
- fEndWordSet = fKhmerWordSet;
- fBeginWordSet.add(0x1780, 0x17B3);
- //fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels
@@ -522,7 +522,7 @@ index f1c874d..3ad1b3f 100644
-// fSuffixSet.add(THAI_MAIYAMOK);
+ fIgnoreSet.add(0x2060); // WJ
+ fIgnoreSet.add(0x200C, 0x200D); // ZWJ, ZWNJ
-+ fBaseSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:^M:]]"), status);
++ fBaseSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:lb=SA:]&[:^M:]]"), status);
+ fPuncSet.applyPattern(UNICODE_STRING_SIMPLE("[\\u17D4\\u17D5\\u17D6\\u17D7\\u17D9:]"), status);
// Compact for caching.
@@ -750,7 +750,7 @@ index f1c874d..3ad1b3f 100644
- if (cuWordLength <= 0) {
- wordsFound += 1;
- }
-+ } while (fMarkSet.contains(c));
++ } while (fMarkSet.contains(c) || fIgnoreSet.contains(c));
+ values.setElementAt(BADSNLP, count);
+ lengths.setElementAt(utext_getNativeIndex(text) - currix, count++);
+ } else {
@@ -775,7 +775,7 @@ index f1c874d..3ad1b3f 100644
- else {
- // Back up to where we were for next iteration
- utext_setNativeIndex(text, current+cuWordLength);
-+ int32_t ln_j_i = ln + i;
++ int32_t ln_j_i = ln + i; // yes really i!
+ if (newSnlp < bestSnlp.elementAti(ln_j_i)) {
+ if (v == BADSNLP) {
+ int32_t p = prev.elementAti(i);
@@ -1395,7 +1395,7 @@ index 816c82d..c637d70 100644
+#$(MAINBUILDDIR)/khmerdict.stamp: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(BRKSRCDIR)/khmerdict.txt build-local
+# $(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(BRKSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
+$(MAINBUILDDIR)/khmerdict.stamp: $(BRKSRCDIR)/khmerdict.dict build-local
-+ cp $< $(MAINBUILDDIR)
++ cp $< $(BRKBLDDIR)
+ echo "timestamp" > $@
#################################################### CFU
commit 85d5174a862c78561c4cf85aa7c6ef2ba99d5352
Author: Martin Hosken <martin_hosken at sil.org>
Date: Sat Dec 12 11:36:53 2015 +0700
Use .dict files since below the 500K limit
Change-Id: Iec71ad4918cd333f0a44d372017ecee300e3aca9
Reviewed-on: https://gerrit.libreoffice.org/20748
Tested-by: Jenkins <ci at libreoffice.org>
Reviewed-by: Martin Hosken <martin_hosken at sil.org>
(cherry picked from commit fbb00383d82da5ce375f1b034d3fb9ebdd9a8f0e)
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
index c48d02556d2d..4a6a11477af3 100644
--- a/external/icu/UnpackedTarball_icu.mk
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -27,6 +27,9 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
external/icu/clang-cl.patch.0 \
$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.diff) \
$(if $(filter EMSCRIPTEN,$(OS)),external/icu/icu4c-emscripten.patch.1) \
+ external/icu/khmerbreakengine.patch \
))
+$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
+
# vim: set noet sw=4 ts=4:
diff --git a/external/icu/khmerbreakengine.patch b/external/icu/khmerbreakengine.patch
new file mode 100644
index 000000000000..03e6079b19f0
--- /dev/null
+++ b/external/icu/khmerbreakengine.patch
@@ -0,0 +1,1403 @@
+diff --git a/source/common/dictbe.cpp b/source/common/dictbe.cpp
+index f1c874d..3ad1b3f 100644
+--- misc/icu/source/common/dictbe.cpp
++++ build/icu/source/common/dictbe.cpp
+@@ -27,8 +27,16 @@ U_NAMESPACE_BEGIN
+ ******************************************************************
+ */
+
+-DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) {
++DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) :
++ clusterLimit(3)
++{
++ UErrorCode status = U_ZERO_ERROR;
+ fTypes = breakTypes;
++ fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
++
++ fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]]"), status);
++ fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]]"), status);
++ fNBeforeSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
+ }
+
+ DictionaryBreakEngine::~DictionaryBreakEngine() {
+@@ -90,7 +98,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
+ result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
+ utext_setNativeIndex(text, current);
+ }
+-
++
+ return result;
+ }
+
+@@ -101,6 +109,163 @@ DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
+ fSet.compact();
+ }
+
++bool
++DictionaryBreakEngine::scanBeforeStart(UText *text, int32_t& start, bool &doBreak) const {
++ UErrorCode status = U_ZERO_ERROR;
++ UText* ut = utext_clone(NULL, text, false, true, &status);
++ utext_setNativeIndex(ut, start);
++ UChar32 c = utext_current32(ut);
++ bool res = false;
++ doBreak = true;
++ while (start >= 0) {
++ if (!fSkipStartSet.contains(c)) {
++ res = (c == ZWSP);
++ break;
++ }
++ --start;
++ c = utext_previous32(ut);
++ doBreak = false;
++ }
++ utext_close(ut);
++ return res;
++}
++
++bool
++DictionaryBreakEngine::scanAfterEnd(UText *text, int32_t textEnd, int32_t& end, bool &doBreak) const {
++ UErrorCode status = U_ZERO_ERROR;
++ UText* ut = utext_clone(NULL, text, false, true, &status);
++ utext_setNativeIndex(ut, end);
++ UChar32 c = utext_current32(ut);
++ bool res = false;
++ doBreak = !fNBeforeSet.contains(c);
++ while (end < textEnd) {
++ if (!fSkipEndSet.contains(c)) {
++ res = (c == ZWSP);
++ break;
++ }
++ ++end;
... etc. - the rest is truncated
More information about the Libreoffice-commits
mailing list