[Libreoffice-commits] core.git: Branch 'distro/collabora/cp-5.1' - 46 commits - configure.ac download.lst external/icu filter/source hwpfilter/qa hwpfilter/source i18npool/source lotuswordpro/source sc/inc sc/source sd/source svl/source svx/source sw/source vcl/unx writerfilter/source

Andras Timar andras.timar at collabora.com
Sun Apr 23 18:54:09 UTC 2017


 configure.ac                                                   |    2 
 download.lst                                                   |    2 
 external/icu/UnpackedTarball_icu.mk                            |    5 
 external/icu/clang-cl.patch.0                                  |   26 
 external/icu/icu4c-changeset-39671.patch.1                     |  189 +
 external/icu/khmerbreakengine.patch                            | 1110 ++++++++++
 external/icu/khmerdict.dict                                    |binary
 filter/source/graphicfilter/icgm/cgm.cxx                       |    8 
 filter/source/graphicfilter/icgm/class1.cxx                    |   17 
 filter/source/graphicfilter/icgm/class4.cxx                    |   25 
 filter/source/graphicfilter/itiff/itiff.cxx                    |   65 
 filter/source/msfilter/msdffimp.cxx                            |   48 
 filter/source/msfilter/svdfppt.cxx                             |   45 
 hwpfilter/qa/cppunit/data/fail/cslist-1.hwp                    |binary
 hwpfilter/source/datecode.h                                    |   23 
 hwpfilter/source/drawing.h                                     |    4 
 hwpfilter/source/fontmap.cxx                                   |   20 
 hwpfilter/source/formula.cxx                                   |    9 
 hwpfilter/source/formula.h                                     |    2 
 hwpfilter/source/grammar.cxx                                   |   35 
 hwpfilter/source/hbox.cxx                                      |   37 
 hwpfilter/source/hbox.h                                        |  101 
 hwpfilter/source/hcode.cxx                                     |    2 
 hwpfilter/source/hfont.cxx                                     |   14 
 hwpfilter/source/hfont.h                                       |    4 
 hwpfilter/source/hgzip.cxx                                     |    8 
 hwpfilter/source/hgzip.h                                       |    2 
 hwpfilter/source/hinfo.cxx                                     |   77 
 hwpfilter/source/hinfo.h                                       |    6 
 hwpfilter/source/hiodev.cxx                                    |   50 
 hwpfilter/source/hiodev.h                                      |   22 
 hwpfilter/source/hpara.cxx                                     |  139 -
 hwpfilter/source/hpara.h                                       |   12 
 hwpfilter/source/hstyle.cxx                                    |    7 
 hwpfilter/source/hstyle.h                                      |    2 
 hwpfilter/source/htags.cxx                                     |   27 
 hwpfilter/source/htags.h                                       |    6 
 hwpfilter/source/hwpeq.cxx                                     |    4 
 hwpfilter/source/hwpfile.cxx                                   |  176 -
 hwpfilter/source/hwpfile.h                                     |   31 
 hwpfilter/source/hwplib.h                                      |    9 
 hwpfilter/source/hwpread.cxx                                   |   11 
 hwpfilter/source/hwpreader.cxx                                 |  704 +++---
 hwpfilter/source/hwpreader.hxx                                 |    8 
 hwpfilter/source/lexer.cxx                                     |   11 
 hwpfilter/source/list.hxx                                      |  146 -
 hwpfilter/source/mapping.h                                     |    2 
 hwpfilter/source/mzstring.cxx                                  |   39 
 hwpfilter/source/mzstring.h                                    |    6 
 hwpfilter/source/nodes.h                                       |    2 
 hwpfilter/source/solver.cxx                                    |   53 
 i18npool/source/breakiterator/breakiterator_unicode.cxx        |    2 
 lotuswordpro/source/filter/lwpframelayout.cxx                  |    7 
 lotuswordpro/source/filter/lwpframelayout.hxx                  |    1 
 lotuswordpro/source/filter/lwpgrfobj.cxx                       |    8 
 lotuswordpro/source/filter/lwpobjstrm.cxx                      |    9 
 lotuswordpro/source/filter/lwpobjstrm.hxx                      |    1 
 sc/inc/refdata.hxx                                             |    2 
 sc/source/core/data/grouptokenconverter.cxx                    |    4 
 sc/source/core/tool/interpr4.cxx                               |   25 
 sc/source/core/tool/interpr6.cxx                               |    6 
 sc/source/core/tool/refdata.cxx                                |    5 
 sc/source/core/tool/token.cxx                                  |   29 
 sc/source/filter/oox/condformatcontext.cxx                     |    4 
 sc/source/ui/StatisticsDialogs/RandomNumberGeneratorDialog.cxx |    2 
 sc/source/ui/docshell/docfunc.cxx                              |    2 
 sc/source/ui/undo/undoblk.cxx                                  |    2 
 sc/source/ui/vba/vbarange.cxx                                  |   16 
 sc/source/ui/view/preview.cxx                                  |    6 
 sd/source/filter/ppt/pptin.cxx                                 |   11 
 sd/source/filter/ppt/pptin.hxx                                 |   10 
 sd/source/ui/view/drviews3.cxx                                 |  351 +--
 svl/source/numbers/zforscan.cxx                                |   23 
 svx/source/table/tablelayouter.cxx                             |   31 
 sw/source/core/objectpositioning/anchoredobjectposition.cxx    |    8 
 sw/source/filter/ww8/docxattributeoutput.cxx                   |    7 
 sw/source/filter/ww8/ww8graf.cxx                               |   10 
 sw/source/filter/ww8/ww8par.hxx                                |    2 
 sw/source/filter/ww8/ww8par2.cxx                               |   44 
 sw/source/filter/ww8/ww8par6.cxx                               |    2 
 sw/source/filter/ww8/ww8scan.cxx                               |    5 
 sw/source/filter/ww8/ww8struc.hxx                              |    2 
 sw/source/uibase/dbui/dbmgr.cxx                                |    7 
 vcl/unx/gtk/a11y/atkutil.cxx                                   |    9 
 writerfilter/source/dmapper/DomainMapper_Impl.cxx              |   16 
 85 files changed, 2612 insertions(+), 1410 deletions(-)

New commits:
commit e98ddef974d860c153958c517fa19b5a03033638
Author: Andras Timar <andras.timar at collabora.com>
Date:   Sun Apr 23 15:35:27 2017 +0200

    hwpfilter from libreoffice-5-2 (ofz fixes and other fixes)
    
    Change-Id: If40942f38ab3536257d7e58d5630136307930cac

diff --git a/hwpfilter/source/datecode.h b/hwpfilter/source/datecode.h
index 89507b419157..e35b6e50a58f 100644
--- a/hwpfilter/source/datecode.h
+++ b/hwpfilter/source/datecode.h
@@ -24,28 +24,7 @@ static const hchar defaultform[] =
 {
     '1', 0x9165, 32, '2', 0xB6A9, 32, '3', 0xB7A9, 0
 };
-#ifdef _DATECODE_WEEK_DEFINES_
-static const hchar kor_week[] =
-{
-    0xB7A9, 0xB6A9, 0xD1C1, 0xAE81, 0xA1A2, 0x8B71, 0xC9A1
-};
-static const hchar china_week[] =
-{
-    0x4CC8, 0x4BE4, 0x525A, 0x48D8, 0x45AB, 0x4270, 0x50B4
-};
-static const char eng_week[] = { "SunMonTueWedThuFriSat" };
-static const char eng_mon[] = { "JanFebMarAprMayJunJulAugSepOctNovDec" };
-static const char * const en_mon[] =
-{
-    "January", "February", "March", "April", "May", "June", "July",
-    "August", "September", "October", "November", "December"
-};
-static const char * const en_week[] =
-{
-    "Sunday", "Monday", "Tuesday", "Wednesday",
-    "Thursday", "Friday", "Saturday"
-};
-#endif //_DATECODE_WEEK_DEFINES_
+
 #endif
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/fontmap.cxx b/hwpfilter/source/fontmap.cxx
index 259722ee791d..8de154283ba1 100644
--- a/hwpfilter/source/fontmap.cxx
+++ b/hwpfilter/source/fontmap.cxx
@@ -117,8 +117,15 @@ const struct FontEntry FontMapTab[] =
     {"\xbd\xc5\xb8\xed \xb1\xc3\xbc\xad",3, 0.97}
 };
 
-#ifndef WIN32
-#if defined(LINUX)
+#if defined(_WIN32)
+const char* RepFontTab[] =
+{
+    "\xb9\xd9\xc5\xc1",                                       /* 0 */
+    "\xb5\xb8\xbf\xf2",                                       /* 1 */
+    "\xb1\xbc\xb8\xb2",                                       /* 2 */
+    "\xb1\xc3\xbc\xad"                                        /* 3 */
+};
+#elif defined(LINUX)
 const char* RepFontTab[] =
 {
     "\xb9\xe9\xb9\xac \xb9\xd9\xc5\xc1",                                     /* 0 */
@@ -135,15 +142,6 @@ const char* RepFontTab[] =
     "Gungso"                                      /* 3 */
 };
 #endif
-#else
-const char* RepFontTab[] =
-{
-    "\xb9\xd9\xc5\xc1",                                       /* 0 */
-    "\xb5\xb8\xbf\xf2",                                       /* 1 */
-    "\xb1\xbc\xb8\xb2",                                       /* 2 */
-    "\xb1\xc3\xbc\xad"                                        /* 3 */
-};
-#endif
 
 int getRepFamilyName(const char* orig, char *buf, double &ratio)
 {
diff --git a/hwpfilter/source/formula.cxx b/hwpfilter/source/formula.cxx
index d12c6cbce0ab..3b114f783f72 100644
--- a/hwpfilter/source/formula.cxx
+++ b/hwpfilter/source/formula.cxx
@@ -178,9 +178,8 @@ void Formula::makeExpr(Node *res)
              break;
          case ID_BLOCK:
              makeBlock(tmp);
-             //fall-through
+             break;
          case ID_BEGIN:
-             //fall-through
          case ID_END:
              break;
     }
@@ -568,10 +567,10 @@ void Formula::makeBlock(Node *res)
 #endif
 }
 
-int Formula::parse()
+void Formula::parse()
 {
      Node *res = nullptr;
-     if( !eq ) return 0;
+     if( !eq ) return;
      if( isHwpEQ ){
           MzString a;
          // fprintf(stderr,"\n\n[BEFORE]\n[%s]\n",eq);
@@ -627,8 +626,6 @@ int Formula::parse()
          nodelist.pop_front();
          delete tmpNode;
      }
-
-     return 0;
 }
 
 void Formula::trim()
diff --git a/hwpfilter/source/formula.h b/hwpfilter/source/formula.h
index 22c53f6a4d6f..a4cddc2e8c8a 100644
--- a/hwpfilter/source/formula.h
+++ b/hwpfilter/source/formula.h
@@ -55,7 +55,7 @@ public:
         pList = p;
         rList = static_cast<XAttributeList *>(pList);
     }
-    int parse();
+    void parse();
 private:
      void trim();
      void makeMathML(Node *res);
diff --git a/hwpfilter/source/grammar.cxx b/hwpfilter/source/grammar.cxx
index 52ce22005c41..14751da6fbf7 100644
--- a/hwpfilter/source/grammar.cxx
+++ b/hwpfilter/source/grammar.cxx
@@ -361,33 +361,6 @@ static const short yycheck[] = {    11,
   It was written by Richard Stallman by simplifying the hairy parser
   used when %semantic_parser is specified.  */
 
-#ifndef YYSTACK_USE_ALLOCA
-#ifdef alloca
-#define YYSTACK_USE_ALLOCA
-#else /* alloca not defined */
-#ifdef __GNUC__
-#define YYSTACK_USE_ALLOCA
-#define alloca __builtin_alloca
-#else /* not GNU C.  */
-#if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) || (defined (__sun) && defined (__i386))
-#define YYSTACK_USE_ALLOCA
-#include <sal/alloca.h>
-#else /* not sparc */
-#if defined(_AIX)
- #pragma alloca
-#define YYSTACK_USE_ALLOCA
-#endif /* not _AIX */
-#endif /* not sparc */
-#endif /* not GNU C */
-#endif /* alloca not defined */
-#endif /* YYSTACK_USE_ALLOCA not defined */
-
-#ifdef YYSTACK_USE_ALLOCA
-#define YYSTACK_ALLOC alloca
-#else
-#define YYSTACK_ALLOC malloc
-#endif
-
 /* Note: there must be only one dollar sign in this file.
    It is replaced by the list of actions, each action
    as one case of the switch.  */
@@ -611,17 +584,15 @@ yynewstate:
       yystacksize *= 2;
       if (yystacksize > YYMAXDEPTH)
     yystacksize = YYMAXDEPTH;
-#ifndef YYSTACK_USE_ALLOCA
       yyfree_stacks = 1;
-#endif
-      yyss = static_cast<short *>(YYSTACK_ALLOC (yystacksize * sizeof (*yyssp)));
+      yyss = static_cast<short *>(malloc (yystacksize * sizeof (*yyssp)));
       memcpy (yyss, yyss1,
            size * (unsigned int) sizeof (*yyssp));
-      yyvs = static_cast<YYSTYPE *>(YYSTACK_ALLOC (yystacksize * sizeof (*yyvsp)));
+      yyvs = static_cast<YYSTYPE *>(malloc (yystacksize * sizeof (*yyvsp)));
       memcpy (yyvs, yyvs1,
            size * (unsigned int) sizeof (*yyvsp));
 #ifdef YYLSP_NEEDED
-      yyls = (YYLTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yylsp));
+      yyls = (YYLTYPE *) malloc (yystacksize * sizeof (*yylsp));
       memcpy ((char *)yyls, (char *)yyls1,
            size * (unsigned int) sizeof (*yylsp));
 #endif
diff --git a/hwpfilter/source/hbox.cxx b/hwpfilter/source/hbox.cxx
index cf027d7091f8..6b39021cf2b4 100644
--- a/hwpfilter/source/hbox.cxx
+++ b/hwpfilter/source/hbox.cxx
@@ -133,9 +133,29 @@ DateCode::DateCode()
 {
 }
 
-#define _DATECODE_WEEK_DEFINES_
 #include "datecode.h"
 
+static const hchar kor_week[] =
+{
+    0xB7A9, 0xB6A9, 0xD1C1, 0xAE81, 0xA1A2, 0x8B71, 0xC9A1
+};
+static const hchar china_week[] =
+{
+    0x4CC8, 0x4BE4, 0x525A, 0x48D8, 0x45AB, 0x4270, 0x50B4
+};
+static const char eng_week[] = { "SunMonTueWedThuFriSat" };
+static const char eng_mon[] = { "JanFebMarAprMayJunJulAugSepOctNovDec" };
+static const char * const en_mon[] =
+{
+    "January", "February", "March", "April", "May", "June", "July",
+    "August", "September", "October", "November", "December"
+};
+static const char * const en_week[] =
+{
+    "Sunday", "Monday", "Tuesday", "Wednesday",
+    "Thursday", "Friday", "Saturday"
+};
+
 hchar_string DateCode::GetString()
 {
     hchar_string ret;
@@ -324,7 +344,7 @@ TxtBox::TxtBox()
     , dummy(0)
     , dummy1(0)
     , cap_len(0)
-    , next(0)
+    , next_box(0)
     , dummy2(0)
     , reserved1(0)
     , cap_pos(0)
@@ -398,12 +418,6 @@ Picture::~Picture()
 }
 
 
-int Picture::Type()
-{
-    return pictype;
-}
-
-
 // line(14)
 // hidden(15)
 Hidden::~Hidden()
diff --git a/hwpfilter/source/hbox.h b/hwpfilter/source/hbox.h
index 87f972cda856..69e14f638e15 100644
--- a/hwpfilter/source/hbox.h
+++ b/hwpfilter/source/hbox.h
@@ -331,7 +331,7 @@ struct TxtBox: public FBox
 
     short     dummy1;                             // to not change structure size */
     short     cap_len;
-    short     next;
+    short     next_box;
     short     dummy2;                             // to not change structure size */
     unsigned char reserved1;
 /**
@@ -375,11 +375,6 @@ struct TxtBox: public FBox
  * @returns Count of cell.
  */
     int NCell()   { return nCell; }
-/**
- * This is one of table, text-box, equalizer and button
- * @returns Type of this object.
- */
-    int Type()    { return type;  }
 
     virtual bool Read(HWPFile &hwpf) override;
 };
@@ -649,7 +644,6 @@ struct Picture: public FBox
     Picture();
     virtual ~Picture();
 
-    int   Type    ();
     virtual bool Read    (HWPFile &hwpf) override;
 };
 
diff --git a/hwpfilter/source/hcode.cxx b/hwpfilter/source/hcode.cxx
index 103b343590aa..271eb9dd4209 100644
--- a/hwpfilter/source/hcode.cxx
+++ b/hwpfilter/source/hcode.cxx
@@ -473,7 +473,7 @@ static hchar lineCharConv(hchar ch)
         case 0x3060 + '\'' - 31:
         case 0x3060 + '\"' - 31:
             ch--;
-
+            SAL_FALLTHROUGH;
         case 0x3060 + '\'' - 32:
         case 0x3060 + '\"' - 32:
         case 0x3060 + '{' - 32:
diff --git a/hwpfilter/source/hfont.cxx b/hwpfilter/source/hfont.cxx
index 72746f23467e..70e394282abc 100644
--- a/hwpfilter/source/hfont.cxx
+++ b/hwpfilter/source/hfont.cxx
@@ -43,18 +43,17 @@ HWPFont::~HWPFont()
 }
 
 
-int HWPFont::AddFont(int lang, const char *font)
+void HWPFont::AddFont(int lang, const char *font)
 {
     int nfonts;
 
     if (!(lang >= 0 && lang < NLanguage))
-        return 0;
+        return;
     nfonts = nFonts[lang];
     if (MAXFONTS <= nfonts)
-        return 0;
+        return;
     strncpy(fontnames[lang] + FONTNAMELEN * nfonts, font, FONTNAMELEN - 1);
     nFonts[lang]++;
-    return nfonts;
 }
 
 
@@ -70,7 +69,7 @@ const char *HWPFont::GetFontName(int lang, int id)
 
 static char buffer[FONTNAMELEN];
 
-bool HWPFont::Read(HWPFile & hwpf)
+void HWPFont::Read(HWPFile & hwpf)
 {
     int lang = 0;
     short nfonts = 0;
@@ -81,7 +80,8 @@ bool HWPFont::Read(HWPFile & hwpf)
         hwpf.Read2b(&nfonts, 1);
         if (!(nfonts > 0 && nfonts < MAXFONTS))
         {
-            return !hwpf.SetState(HWP_InvalidFileFormat);
+            (void)hwpf.SetState(HWP_InvalidFileFormat);
+            return;
         }
         fontnames[lang] = new char[nfonts * FONTNAMELEN];
 
@@ -92,8 +92,6 @@ bool HWPFont::Read(HWPFile & hwpf)
             AddFont(lang, buffer);
         }
     }
-
-    return !hwpf.State();
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/hfont.h b/hwpfilter/source/hfont.h
index 1ed8fa6bb3cd..074346f8f892 100644
--- a/hwpfilter/source/hfont.h
+++ b/hwpfilter/source/hfont.h
@@ -54,7 +54,7 @@ class DLLEXPORT HWPFont
  * @param lang Language index
  * @param font Name of font family
  */
-        int       AddFont( int lang, const char *font );
+        void       AddFont( int lang, const char *font );
 /**
  * @param lang Language index
  * @param id Index of font
@@ -62,7 +62,7 @@ class DLLEXPORT HWPFont
  */
         const char    *GetFontName( int lang, int id );
 
-        bool Read( HWPFile &hwpf );
+        void Read( HWPFile &hwpf );
 };
 #endif                                            /* _HWPFONTS+H_ */
 
diff --git a/hwpfilter/source/hinfo.cxx b/hwpfilter/source/hinfo.cxx
index 2ca1714045f1..2e5d4b1c9a16 100644
--- a/hwpfilter/source/hinfo.cxx
+++ b/hwpfilter/source/hinfo.cxx
@@ -75,7 +75,7 @@ HWPInfo::~HWPInfo()
  * Function for reading document information (128 bytes)
  * Document information is the information after the file identification information (30 bytes).
  */
-bool HWPInfo::Read(HWPFile & hwpf)
+void HWPInfo::Read(HWPFile & hwpf)
 {
     hwpf.Read2b(&cur_col, 1);                     /* When a document is saving, the paragraph number where the coursor is */
     hwpf.Read2b(&cur_row, 1);                     /* Paragraphs rows */
@@ -86,31 +86,31 @@ bool HWPInfo::Read(HWPFile & hwpf)
 // paper geometry information
     unsigned short tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     paper.paper_height = tmp16;                   /* Paper length */
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     paper.paper_width = tmp16;                    /* Sheet width */
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     paper.top_margin = tmp16;                     /* Top margin */
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     paper.bottom_margin = tmp16;                  /* The bottom margin */
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     paper.left_margin = tmp16;                    /* Left Margin */
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     paper.right_margin = tmp16;                   /* Right margins */
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     paper.header_length = tmp16;                  /* Header length */
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     paper.footer_length = tmp16;                  /* Footer length */
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     paper.gutter_length = tmp16;                  /* The binding margin */
     hwpf.Read2b(&readonly, 1);                    /* Reserve */
     hwpf.Read1b(reserved1, 4);                    /* Reserve */
@@ -129,22 +129,22 @@ bool HWPInfo::Read(HWPFile & hwpf)
     hwpf.Read2b(&countfn,1);                      /* Number of footnote */
 
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     splinetext = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     splinefn = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     spfnfn = tmp16;
     hwpf.Read1b(&fnchar, 1);
     hwpf.Read1b(&fnlinetype, 1);
 // border layout
-    for (int ii = 0; ii < 4; ++ii)
+    for (int & ii : bordermargin)
     {
         if (!hwpf.Read2b(tmp16))
-            return false;
-        bordermargin[ii] = tmp16;
+            return;
+        ii = tmp16;
     }
     hwpf.Read2b(&borderline, 1);
 
@@ -156,17 +156,17 @@ bool HWPInfo::Read(HWPFile & hwpf)
 
     hwpf.Read2b(&info_block_len, 1);
     if (hwpf.State())
-        return false;
+        return;
 
 /* Read the article summary. */
     if (!summary.Read(hwpf))
-        return false;
+        return;
     if (info_block_len > 0)
     {
         info_block = new unsigned char[info_block_len + 1];
 
         if (!HWPReadInfoBlock(info_block, info_block_len, hwpf))
-            return false;
+            return;
     }
 
 /* reset the value of hwpf. */
@@ -174,8 +174,6 @@ bool HWPInfo::Read(HWPFile & hwpf)
     hwpf.encrypted = encrypted != 0;
     hwpf.info_block_len = info_block_len;
     hwpf.SetCompressed(hwpf.compressed);
-
-    return (!hwpf.State());
 }
 
 
@@ -197,64 +195,63 @@ bool HWPSummary::Read(HWPFile & hwpf)
 }
 
 
-bool ParaShape::Read(HWPFile & hwpf)
+void ParaShape::Read(HWPFile & hwpf)
 {
     pagebreak = 0;
     unsigned short tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     left_margin = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     right_margin = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     indent = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     lspacing = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     pspacing_next = tmp16;
 
     hwpf.Read1b(&condense, 1);
     hwpf.Read1b(&arrange_type, 1);
-    for (int ii = 0; ii < MAXTABS; ii++)
+    for (TabSet & tab : tabs)
     {
-        hwpf.Read1b(&tabs[ii].type, 1);
-        hwpf.Read1b(&tabs[ii].dot_continue, 1);
+        hwpf.Read1b(&tab.type, 1);
+        hwpf.Read1b(&tab.dot_continue, 1);
         if (!hwpf.Read2b(tmp16))
-            return false;
-        tabs[ii].position = tmp16;
+            return;
+        tab.position = tmp16;
     }
     hwpf.Read1b(&coldef.ncols, 1);
     hwpf.Read1b(&coldef.separator, 1);
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     coldef.spacing = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     coldef.columnlen = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     coldef.columnlen0 = tmp16;
     hwpf.Read1b(&shade, 1);
     hwpf.Read1b(&outline, 1);
     hwpf.Read1b(&outline_continue, 1);
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     pspacing_prev = tmp16;
 
     hwpf.Read1b(reserved, 2);
-    return (!hwpf.State());
 }
 
 
-bool CharShape::Read(HWPFile & hwpf)
+void CharShape::Read(HWPFile & hwpf)
 {
     unsigned short tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     size = tmp16;
     hwpf.Read1b(font, NLanguage);
     hwpf.Read1b(ratio, NLanguage);
@@ -263,8 +260,6 @@ bool CharShape::Read(HWPFile & hwpf)
     hwpf.Read1b(&shade, 1);
     hwpf.Read1b(&attr, 1);
     hwpf.Read1b(reserved, 4);
-
-    return (!hwpf.State());
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/hinfo.h b/hwpfilter/source/hinfo.h
index 565e6a1cb54f..b080150e30c8 100644
--- a/hwpfilter/source/hinfo.h
+++ b/hwpfilter/source/hinfo.h
@@ -205,7 +205,7 @@ class DLLEXPORT HWPInfo
         HWPInfo(void);
         ~HWPInfo(void);
 
-        bool Read(HWPFile &hwpf);
+        void Read(HWPFile &hwpf);
 
 };
 
@@ -231,7 +231,7 @@ struct CharShape
     unsigned char attr;
     unsigned char reserved[4];
 
-    bool Read(HWPFile &);
+    void Read(HWPFile &);
 };
 
 /* ?? ?????? ???? ?????? */
@@ -284,7 +284,7 @@ struct ParaShape
     CharShape *cshape;
      unsigned char pagebreak;
 
-    bool  Read(HWPFile &);
+    void  Read(HWPFile &);
 //  virtual ~ParaShape();
 };
 #endif // INCLUDED_HWPFILTER_SOURCE_HINFO_H
diff --git a/hwpfilter/source/hiodev.cxx b/hwpfilter/source/hiodev.cxx
index 677538809760..6ae96117a25c 100644
--- a/hwpfilter/source/hiodev.cxx
+++ b/hwpfilter/source/hiodev.cxx
@@ -20,7 +20,7 @@
 #include <stdio.h>
 #include <errno.h>
 
-#ifdef WIN32
+#ifdef _WIN32
 # include <io.h>
 #else
 # include <unistd.h>
diff --git a/hwpfilter/source/hpara.cxx b/hwpfilter/source/hpara.cxx
index bbac6da6844c..cce909ead16e 100644
--- a/hwpfilter/source/hpara.cxx
+++ b/hwpfilter/source/hpara.cxx
@@ -29,29 +29,29 @@
 #include "hbox.h"
 #include "hutil.h"
 
-bool LineInfo::Read(HWPFile & hwpf, HWPPara *pPara)
+void LineInfo::Read(HWPFile & hwpf, HWPPara *pPara)
 {
     if (!hwpf.Read2b(pos))
-        return false;
+        return;
     unsigned short tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     space_width = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     height = tmp16;
 // internal information
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     pgy = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     sx = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     psx = tmp16;
     if (!hwpf.Read2b(tmp16))
-        return false;
+        return;
     pex = tmp16;
     height_sp = 0;
 
@@ -62,8 +62,6 @@ bool LineInfo::Read(HWPFile & hwpf, HWPPara *pPara)
         pPara->pshape.reserved[0] = sal::static_int_cast<unsigned char>(pex & 0x01);
         pPara->pshape.reserved[1] = sal::static_int_cast<unsigned char>(pex & 0x02);
     }
-
-    return (!hwpf.State());
 }
 
 HWPPara::HWPPara()
@@ -77,18 +75,16 @@ HWPPara::HWPPara()
     , etcflag(0)
     , ctrlflag(0)
     , pstyno(0)
-    , pno(0)
+    , cshape(new CharShape)
     , linfo(nullptr)
-    , cshapep(nullptr)
 {
-    memset(&cshape, 0, sizeof(cshape));
+    memset(cshape.get(), 0, sizeof(CharShape));
     memset(&pshape, 0, sizeof(pshape));
 }
 
 HWPPara::~HWPPara()
 {
     delete[] linfo;
-    delete[] cshapep;
 }
 
 bool HWPPara::Read(HWPFile & hwpf, unsigned char flag)
@@ -105,18 +101,17 @@ bool HWPPara::Read(HWPFile & hwpf, unsigned char flag)
     hwpf.Read4b(&ctrlflag, 1);
     hwpf.Read1b(&pstyno, 1);
 
-
 /* Paragraph representative character */
-    cshape.Read(hwpf);
+    cshape->Read(hwpf);
     if (nch > 0)
-        hwpf.AddCharShape(&cshape);
+        hwpf.AddCharShape(cshape);
 
 /* Paragraph paragraphs shape  */
     if (nch && !reuse_shape)
     {
         pshape.Read(hwpf);
-        pshape.cshape = &cshape;
-          pshape.pagebreak = etcflag;
+        pshape.cshape = cshape.get();
+        pshape.pagebreak = etcflag;
     }
 
     linfo = ::comphelper::newArray_null<LineInfo>(nline);
@@ -147,23 +142,19 @@ bool HWPPara::Read(HWPFile & hwpf, unsigned char flag)
 
     if (contain_cshape)
     {
-        cshapep = ::comphelper::newArray_null<CharShape>(nch);
-        if (!cshapep)
-        {
-            perror("Memory Allocation: cshape\n");
-            return false;
-        }
-        memset(cshapep, 0, nch * sizeof(CharShape));
+        cshapep.resize(nch);
 
         for (ii = 0; ii < nch; ii++)
         {
+            cshapep[ii].reset(new CharShape);
+            memset(cshapep[ii].get(), 0, sizeof(CharShape));
 
             hwpf.Read1b(&same_cshape, 1);
             if (!same_cshape)
             {
-                cshapep[ii].Read(hwpf);
+                cshapep[ii]->Read(hwpf);
                 if (nch > 1)
-                    hwpf.AddCharShape(&cshapep[ii]);
+                    hwpf.AddCharShape(cshapep[ii]);
             }
             else if (ii == 0)
                 cshapep[ii] = cshape;
@@ -188,15 +179,13 @@ bool HWPPara::Read(HWPFile & hwpf, unsigned char flag)
     return nch && !hwpf.State();
 }
 
-
 CharShape *HWPPara::GetCharShape(int pos)
 {
     if (contain_cshape == 0)
-        return &cshape;
-    return cshapep + pos;
+        return cshape.get();
+    return cshapep[pos].get();
 }
 
-
 std::unique_ptr<HBox> HWPPara::readHBox(HWPFile & hwpf)
 {
     std::unique_ptr<HBox> hbox;
diff --git a/hwpfilter/source/hpara.h b/hwpfilter/source/hpara.h
index 62f19982efc6..126a9ece1482 100644
--- a/hwpfilter/source/hpara.h
+++ b/hwpfilter/source/hpara.h
@@ -69,7 +69,7 @@ struct LineInfo
     hunit         height_sp;
     unsigned short    softbreak;                  // column, page, section
 
-    bool  Read(HWPFile &hwpf, HWPPara *para);
+    void  Read(HWPFile &hwpf, HWPPara *para);
 };
 /**
  * It represents the paragraph.
@@ -103,12 +103,11 @@ class DLLEXPORT HWPPara
  */
         unsigned long     ctrlflag;
         unsigned char     pstyno;
-        CharShape     cshape;                     /* When characters are all the same shape */
+        std::shared_ptr<CharShape> cshape;                     /* When characters are all the same shape */
         ParaShape     pshape;                     /* if reuse flag is 0, */
-        int           pno;                        /* then run-time only */
 
         LineInfo      *linfo;
-        CharShape     *cshapep;
+        std::vector<std::shared_ptr<CharShape>>   cshapep;
 /**
  * Box object list
  */
diff --git a/hwpfilter/source/hstyle.cxx b/hwpfilter/source/hstyle.cxx
index 6a67d23818de..6c29a500d2b5 100644
--- a/hwpfilter/source/hstyle.cxx
+++ b/hwpfilter/source/hstyle.cxx
@@ -113,7 +113,7 @@ void HWPStyle::SetParaShape(int n, ParaShape * pshapep)
 }
 
 
-bool HWPStyle::Read(HWPFile & hwpf)
+void HWPStyle::Read(HWPFile & hwpf)
 {
     CharShape cshape;
     ParaShape pshape;
@@ -121,7 +121,7 @@ bool HWPStyle::Read(HWPFile & hwpf)
     hwpf.Read2b(&nstyles, 1);
     style = ::comphelper::newArray_null<StyleData>(nstyles);
     if (!style)
-        return false;
+        return;
 
     for (int ii = 0; ii < nstyles; ii++)
     {
@@ -133,9 +133,8 @@ bool HWPStyle::Read(HWPFile & hwpf)
         SetCharShape(ii, &cshape);
         SetParaShape(ii, &pshape);
         if (hwpf.State())
-            return false;
+            return;
     }
-    return true;
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/hstyle.h b/hwpfilter/source/hstyle.h
index 443102983535..c13523061a74 100644
--- a/hwpfilter/source/hstyle.h
+++ b/hwpfilter/source/hstyle.h
@@ -45,7 +45,7 @@ class DLLEXPORT HWPStyle
         void SetCharShape( int n, CharShape *cshapep );
         void SetParaShape( int n, ParaShape *pshapep );
 
-        bool Read( HWPFile &hwpf );
+        void Read( HWPFile &hwpf );
 };
 #endif
 /* _HWPSTYLE+H_ */
diff --git a/hwpfilter/source/htags.cxx b/hwpfilter/source/htags.cxx
index eb49626baa34..044f7d2a6a61 100644
--- a/hwpfilter/source/htags.cxx
+++ b/hwpfilter/source/htags.cxx
@@ -25,7 +25,7 @@
 #include "hwpfile.h"
 #include "htags.h"
 
-bool HyperText::Read(HWPFile & hwpf)
+void HyperText::Read(HWPFile & hwpf)
 {
     hwpf.Read1b(filename, 256);
     hwpf.Read2b(bookmark, 16);
@@ -41,7 +41,6 @@ bool HyperText::Read(HWPFile & hwpf)
                 break;
         }
     }
-    return true;
 }
 
 
@@ -53,7 +52,7 @@ EmPicture::EmPicture(size_t tsize)
     else
         data = new uchar[size];
 }
-#ifdef WIN32
+#ifdef _WIN32
 #define unlink _unlink
 #endif
 EmPicture::~EmPicture()
@@ -84,14 +83,14 @@ OlePicture::OlePicture(int tsize)
     size = tsize - 4;
     if (size <= 0)
         return;
-#ifndef WIN32
+#ifndef _WIN32
      pis = new char[size];
 #endif
 };
 
 OlePicture::~OlePicture()
 {
-#ifdef WIN32
+#ifdef _WIN32
      if( pis )
           pis->Release();
 #else
@@ -101,21 +100,21 @@ OlePicture::~OlePicture()
 
 #define FILESTG_SIGNATURE_NORMAL 0xF8995568
 
-bool OlePicture::Read(HWPFile & hwpf)
+void OlePicture::Read(HWPFile & hwpf)
 {
     if (size <= 0)
-        return false;
+        return;
 
 // We process only FILESTG_SIGNATURE_NORMAL.
     hwpf.Read4b(&signature, 1);
     if (signature != FILESTG_SIGNATURE_NORMAL)
-        return false;
-#ifdef WIN32
+        return;
+#ifdef _WIN32
     char *data = new char[size];
     if (hwpf.ReadBlock(data,size) == 0)
     {
           delete [] data;
-          return false;
+          return;
     }
     FILE *fp;
     char tname[200];
@@ -124,7 +123,7 @@ bool OlePicture::Read(HWPFile & hwpf)
     if (0 == (fp = fopen(tname, "wb")))
     {
          delete [] data;
-         return false;
+         return;
     }
     fwrite(data, size, 1, fp);
     delete [] data;
@@ -135,15 +134,13 @@ bool OlePicture::Read(HWPFile & hwpf)
                     NULL, 0, &pis) != S_OK ) {
          pis = 0;
          unlink(tname);
-         return false;
+         return;
     }
     unlink(tname);
 #else
     if (pis == nullptr || hwpf.ReadBlock(pis, size) == 0)
-        return false;
+        return;
 #endif
-
-    return true;
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/htags.h b/hwpfilter/source/htags.h
index 303b84461c9e..292897b47bcd 100644
--- a/hwpfilter/source/htags.h
+++ b/hwpfilter/source/htags.h
@@ -46,7 +46,7 @@ struct HyperText
     char  macro[325];
     uchar type;
     char reserve[3];
-    bool Read(HWPFile& hwpf);
+    void Read(HWPFile& hwpf);
 };
 /**
  * @short Win32 OLE object
@@ -55,7 +55,7 @@ struct OlePicture
 {
     int   size;
     uint signature;
-#ifdef WIN32
+#ifdef _WIN32
     IStorage *pis;
 #else
     char *pis;
@@ -63,7 +63,7 @@ struct OlePicture
     explicit OlePicture(int tsize);
     ~OlePicture(void);
 
-    bool Read(HWPFile& hwpf);
+    void Read(HWPFile& hwpf);
 };
 #endif // INCLUDED_HWPFILTER_SOURCE_HTAGS_H
 
diff --git a/hwpfilter/source/hwpeq.cxx b/hwpfilter/source/hwpeq.cxx
index dfd2d0a88a8e..dd461103a1a6 100644
--- a/hwpfilter/source/hwpeq.cxx
+++ b/hwpfilter/source/hwpeq.cxx
@@ -33,7 +33,7 @@ using namespace std;
 #include <sal/macros.h>
 
 /* @Man: change the hwp formula to LaTeX */
-#ifdef WIN32
+#ifdef _WIN32
 # define ENDL  "\r\n"
 #else /* !WIN32 */
 # define ENDL  "\n"
@@ -48,7 +48,7 @@ using namespace std;
 #define IS_WS(ch)   (strchr(WS, ch))
 #define IS_BINARY(ch)   (strchr("+-<=>", ch))
 
-#ifdef WIN32
+#ifdef _WIN32
 #define STRICMP stricmp
 #else
 #define STRICMP strcasecmp
diff --git a/hwpfilter/source/hwpfile.cxx b/hwpfilter/source/hwpfile.cxx
index 22310ea75549..a6f97f9f1a87 100644
--- a/hwpfilter/source/hwpfile.cxx
+++ b/hwpfilter/source/hwpfile.cxx
@@ -176,15 +176,17 @@ int HWPFile::Read1b(void *ptr, size_t nmemb)
     return hiodev ? hiodev->read1b(ptr, nmemb) : 0;
 }
 
-int HWPFile::Read2b(void *ptr, size_t nmemb)
+void HWPFile::Read2b(void *ptr, size_t nmemb)
 {
-    return hiodev ? hiodev->read2b(ptr, nmemb) : 0;
+    if (hiodev)
+        hiodev->read2b(ptr, nmemb);
 }
 
 
-int HWPFile::Read4b(void *ptr, size_t nmemb)
+void HWPFile::Read4b(void *ptr, size_t nmemb)
 {
-    return hiodev ? hiodev->read4b(ptr, nmemb) : 0;
+    if (hiodev)
+        hiodev->read4b(ptr, nmemb);
 }
 
 
@@ -200,9 +202,10 @@ size_t HWPFile::SkipBlock(size_t size)
 }
 
 
-bool HWPFile::SetCompressed(bool flag)
+void HWPFile::SetCompressed(bool flag)
 {
-    return hiodev && hiodev->setCompressed(flag);
+    if (hiodev)
+        hiodev->setCompressed(flag);
 }
 
 
@@ -218,34 +221,34 @@ HIODev *HWPFile::SetIODevice(HIODev * new_hiodev)
 
 // end of HIODev wrapper
 
-bool HWPFile::InfoRead()
+void HWPFile::InfoRead()
 {
-    return _hwpInfo.Read(*this);
+    _hwpInfo.Read(*this);
 }
 
 
-bool HWPFile::FontRead()
+void HWPFile::FontRead()
 {
-    return _hwpFont.Read(*this);
+    _hwpFont.Read(*this);
 }
 
 
-bool HWPFile::StyleRead()
+void HWPFile::StyleRead()
 {
-    return _hwpStyle.Read(*this);
+    _hwpStyle.Read(*this);
 }
 
 
-bool HWPFile::ParaListRead()
+void HWPFile::ParaListRead()
 {
-    return ReadParaList(plist);
+    ReadParaList(plist);
 }
 
 bool HWPFile::ReadParaList(std::list < HWPPara* > &aplist, unsigned char flag)
 {
     std::unique_ptr<HWPPara> spNode( new HWPPara );
-     unsigned char tmp_etcflag;
-     unsigned char prev_etcflag = 0;
+    unsigned char tmp_etcflag;
+    unsigned char prev_etcflag = 0;
     while (spNode->Read(*this, flag))
     {
          if( !(spNode->etcflag & 0x04) ){
@@ -380,7 +383,7 @@ ColumnDef *HWPFile::GetColumnDef(int num)
 
     for(int i = 0; it != columnlist.end() ; ++it, i++){
         if( i == num )
-	  break;
+            break;
     }
 
     if( it != columnlist.end() )
@@ -407,8 +410,8 @@ HyperText *HWPFile::GetHyperText()
     std::list<HyperText*>::iterator it = hyperlist.begin();
 
     for( int i = 0; it != hyperlist.end(); ++it, i++ ){
-	if( i == currenthyper )
-	  break;
+        if( i == currenthyper )
+          break;
     }
 
     currenthyper++;
@@ -459,7 +462,7 @@ CharShape *HWPFile::getCharShape(int index)
 {
     if (index < 0 || static_cast<unsigned int>(index) >= cslist.size())
         return nullptr;
-    return cslist[index];
+    return cslist[index].get();
 }
 
 FBoxStyle *HWPFile::getFBoxStyle(int index)
@@ -502,22 +505,24 @@ void HWPFile::AddParaShape(ParaShape * pshape)
     int nscount = 0;
     for(int j = 0 ; j < MAXTABS-1 ; j++)
     {
-          if( j > 0 && pshape->tabs[j].position == 0 )
-                break;
-          if( pshape->tabs[0].position == 0 ){
-                if( pshape->tabs[j].type || pshape->tabs[j].dot_continue ||
-                     (pshape->tabs[j].position != 1000 *j) )
-                          nscount = j;
-          }
-          else{
-                if( pshape->tabs[j].type || pshape->tabs[j].dot_continue ||
-                     (pshape->tabs[j].position != 1000 * (j + 1)) )
-                          nscount = j;
+        if( j > 0 && pshape->tabs[j].position == 0 )
+          break;
+        if( pshape->tabs[0].position == 0 ){
+            if( pshape->tabs[j].type || pshape->tabs[j].dot_continue ||
+                 (pshape->tabs[j].position != 1000 *j) )
+                      nscount = j;
+        }
+        else {
+            if( pshape->tabs[j].type || pshape->tabs[j].dot_continue ||
+                (pshape->tabs[j].position != 1000 * (j + 1)) )
+                    nscount = j;
           }
     }
     if( nscount )
         pshape->tabs[MAXTABS-1].type = sal::static_int_cast<char>(nscount);
-     int value = compareParaShape(pshape);
+
+    int value = compareParaShape(pshape);
+
     if( value == 0 || nscount )
     {
         pshape->index = ++pcount;
@@ -527,11 +532,10 @@ void HWPFile::AddParaShape(ParaShape * pshape)
         pshape->index = value;
 }
 
-
-void HWPFile::AddCharShape(CharShape * cshape)
+void HWPFile::AddCharShape(std::shared_ptr<CharShape>& cshape)
 {
-    int value = compareCharShape(cshape);
-    if( value == 0 )
+    int value = compareCharShape(cshape.get());
+    if (value == 0)
     {
         cshape->index = ++ccount;
         cslist.push_back(cshape);
diff --git a/hwpfilter/source/hwpfile.h b/hwpfilter/source/hwpfile.h
index d8b8918176b3..e1fb3236a84d 100644
--- a/hwpfilter/source/hwpfile.h
+++ b/hwpfilter/source/hwpfile.h
@@ -143,11 +143,11 @@ class DLLEXPORT HWPFile
 /**
  * Reads nmemb short type array from HIODev
  */
-        int Read2b( void *ptr, size_t nmemb );
+        void Read2b( void *ptr, size_t nmemb );
 /**
  * Reads nmemb long type array from HIODev
  */
-        int Read4b( void *ptr, size_t nmemb );
+        void Read4b( void *ptr, size_t nmemb );
 /**
  * Reads some bytes from HIODev not regarding endian's way
  * @param size Amount for reading
@@ -164,7 +164,7 @@ class DLLEXPORT HWPFile
 /**
  * Sets if the stream is compressed
  */
-        bool SetCompressed( bool );
+        void SetCompressed( bool );
 /**
  * Sets current HIODev
  */
@@ -177,19 +177,19 @@ class DLLEXPORT HWPFile
 /**
  * Reads document information of hwp file from HIODev
  */
-        bool InfoRead(void);
+        void InfoRead(void);
 /**
  * Reads font list of hwp file from HIODev
  */
-        bool FontRead(void);
+        void FontRead(void);
 /**
  * Reads style list of hwp file from HIODev
  */
-        bool StyleRead(void);
+        void StyleRead(void);
 /**
  * Reads paragraph list of hwp file from HIODev
  */
-        bool ParaListRead();
+        void ParaListRead();
 /* 그림 등의 추가 정보를 읽는다. */
 /**
  * Reads additional information like embedded image of hwp file from HIODev
@@ -214,7 +214,7 @@ class DLLEXPORT HWPFile
         void AddColumnInfo();
         void SetColumnDef(ColumnDef *coldef);
         void AddParaShape(ParaShape *);
-        void AddCharShape(CharShape *);
+        void AddCharShape(std::shared_ptr<CharShape>&);
         void AddFBoxStyle(FBoxStyle *);
         void AddDateFormat(DateCode *);
         void AddHeaderFooter(HeaderFooter *);
@@ -285,7 +285,7 @@ class DLLEXPORT HWPFile
         std::list<HyperText*> hyperlist;
         int currenthyper;
         std::vector<ParaShape*> pslist;             /* 스타오피스의 구조상 필요 */
-        std::vector<CharShape*> cslist;
+        std::vector<std::shared_ptr<CharShape>> cslist;
         std::vector<FBoxStyle*> fbslist;
         std::vector<DateCode*> datecodes;
         std::vector<HeaderFooter*> headerfooters;
diff --git a/hwpfilter/source/hwplib.h b/hwpfilter/source/hwplib.h
index 5d1324d537cb..39d2f1b171f3 100644
--- a/hwpfilter/source/hwplib.h
+++ b/hwpfilter/source/hwplib.h
@@ -27,24 +27,16 @@
 /**
  * size of hunit is 4 since hwp96 version
  */
-#ifndef _HCHAR_
-#define _HCHAR_
 typedef unsigned short  hchar;
 typedef int     hunit;
 typedef unsigned char       kchar;
-#endif                                            // _HCHAR_
 
-#ifndef _UTYPE_
-#define _UTYPE_
 typedef unsigned char   uchar;
 typedef unsigned short  ushort;
 typedef unsigned int    uint;
-#endif                                            /* _UTYPE_ */
 
 typedef ::std::basic_string<hchar> hchar_string;
 
-#ifndef _ZZRECT_
-#define _ZZRECT_
 /**
  * @short Point
  */
@@ -86,7 +78,6 @@ typedef struct
     int   mulX, divX;
     int   mulY, divY;
 } ZZScaleXY;
-#endif                                            /* _ZZRECT_ */
 
 #if !defined(_WIN32) && !defined(MAX_PATH)
 #  define MAX_PATH  260
diff --git a/hwpfilter/source/hwpreader.cxx b/hwpfilter/source/hwpreader.cxx
index e3197d1c176f..8a8c46049020 100644
--- a/hwpfilter/source/hwpreader.cxx
+++ b/hwpfilter/source/hwpreader.cxx
@@ -157,10 +157,10 @@ sal_Bool HwpReader::filter(const Sequence< PropertyValue >& rDescriptor) throw(R
         nTotal += nRead;
     }
 
-    if( nTotal == 0 ) return sal_False;
+    if( nTotal == 0 ) return false;
 
     if (hwpfile.ReadHwpFile(stream.release()))
-          return sal_False;
+          return false;
 
     if (m_rxDocumentHandler.is())
         m_rxDocumentHandler->startDocument();
@@ -198,7 +198,7 @@ sal_Bool HwpReader::filter(const Sequence< PropertyValue >& rDescriptor) throw(R
 
     if (m_rxDocumentHandler.is())
         m_rxDocumentHandler->endDocument();
-    return sal_True;
+    return true;
 }
 
 
@@ -1718,7 +1718,7 @@ void HwpReader::makePageStyle()
              if( hwpinfo.back_info.type == 1 ){
 #ifdef _WIN32
                  padd("xlink:href", sXML_CDATA,
-                      hconv(kstr2hstr((uchar*) urltowin(hwpinfo.back_info.filename).c_str()).c_str()));
+                      reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr((uchar*) urltowin(hwpinfo.back_info.filename).c_str()).c_str())));
 #else
                  padd("xlink:href", sXML_CDATA,
                     reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr( reinterpret_cast<uchar const *>(urltounix(hwpinfo.back_info.filename).c_str())).c_str())));
@@ -2736,7 +2736,7 @@ void HwpReader::make_text_p0(HWPPara * para, bool bParaStart)
         d->bInHeader = false;
     }
     padd("text:style-name", sXML_CDATA,
-        ascii(getTStyleName(para->cshape.index, buf)));
+        ascii(getTStyleName(para->cshape->index, buf)));
     rstartEl("text:span", mxList.get());
     mxList->clear();
 
@@ -2780,8 +2780,8 @@ void HwpReader::make_text_p1(HWPPara * para,bool bParaStart)
     hchar_string str;
     int n;
     int res;
-     hchar dest[3];
-    int curr = para->cshape.index;
+    hchar dest[3];
+    int curr = para->cshape->index;
     unsigned char firstspace = 0;
 
     if( !bParaStart )
@@ -3829,9 +3829,9 @@ void HwpReader::makePicture(Picture * hbox)
                 padd("xlink:type", sXML_CDATA, "simple");
 #ifdef _WIN32
                 if( hbox->follow[4] != 0 )
-                    padd("xlink:href", sXML_CDATA, (hconv(kstr2hstr(hbox->follow + 4).c_str())));
+                    padd("xlink:href", sXML_CDATA, reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr(hbox->follow + 4).c_str())));
                 else
-                    padd("xlink:href", sXML_CDATA, (hconv(kstr2hstr(hbox->follow + 5).c_str())));
+                    padd("xlink:href", sXML_CDATA, reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr(hbox->follow + 5).c_str())));
 #else
                 if( hbox->follow[4] != 0 )
                     padd("xlink:href", sXML_CDATA,
@@ -3890,7 +3890,7 @@ void HwpReader::makePicture(Picture * hbox)
             if ( hbox->pictype == PICTYPE_FILE ){
 #ifdef _WIN32
                 sprintf(buf, "file:///%s", hbox->picinfo.picun.path );
-                padd("xlink:href", sXML_CDATA, (hconv(kstr2hstr((uchar *) buf).c_str())));
+                padd("xlink:href", sXML_CDATA, reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr((uchar *) buf).c_str())));
 #else
                 padd("xlink:href", sXML_CDATA,
                     reinterpret_cast<sal_Unicode const *>(hconv(kstr2hstr(reinterpret_cast<uchar const *>(urltounix(hbox->picinfo.picun.path).c_str())).c_str())));
@@ -3919,7 +3919,7 @@ void HwpReader::makePicture(Picture * hbox)
                      }
                      else{
                          if( hwpfile.oledata ){
-#ifdef WIN32
+#ifdef _WIN32
                              LPSTORAGE srcsto;
                              LPUNKNOWN pObj;
                              wchar_t pathname[200];
@@ -4780,9 +4780,9 @@ void HwpReader::makeOutline(Outline * hbox)
 }
 
 
-void HwpReader::parsePara(HWPPara * para, bool bParaStart)
+void HwpReader::parsePara(HWPPara * para)
 {
-
+    bool bParaStart = false;
     while (para)
     {
         if( para->nch == 1)
diff --git a/hwpfilter/source/hwpreader.hxx b/hwpfilter/source/hwpreader.hxx
index 2f47aa289179..9a4b813ed70b 100644
--- a/hwpfilter/source/hwpreader.hxx
+++ b/hwpfilter/source/hwpreader.hxx
@@ -106,7 +106,7 @@ private:
     void makeTextDecls();
 
     /* -------- Paragraph Parsing --------- */
-    void parsePara(HWPPara *para, bool bParaStart = false);
+    void parsePara(HWPPara *para);
     void make_text_p0(HWPPara *para, bool bParaStart = false);
     void make_text_p1(HWPPara *para, bool bParaStart = false);
     void make_text_p3(HWPPara *para, bool bParaStart = false);
@@ -152,7 +152,7 @@ private:
 class HwpImportFilter : public WeakImplHelper< XFilter, XImporter, XServiceInfo, XExtendedFilterDetection >
 {
 public:
-    HwpImportFilter(const Reference< XMultiServiceFactory >& rFact);
+    explicit HwpImportFilter(const Reference< XMultiServiceFactory >& rFact);
     virtual ~HwpImportFilter();
 
 public:
diff --git a/hwpfilter/source/lexer.cxx b/hwpfilter/source/lexer.cxx
index e7518ab38c25..76ba0ba770e8 100644
--- a/hwpfilter/source/lexer.cxx
+++ b/hwpfilter/source/lexer.cxx
@@ -48,7 +48,7 @@
 #ifdef __cplusplus
 
 #include <stdlib.h>
-#ifndef WIN32
+#ifndef _WIN32
 #include <unistd.h>
 #else
 #include <io.h>
@@ -960,7 +960,7 @@ char *yytext;
 #include <string.h>
 #include "nodes.h"
 
-#ifdef WIN32
+#ifdef _WIN32
 #define strdup _strdup
 #define fileno _fileno
 #define isatty _isatty
@@ -975,7 +975,7 @@ int yywrap();
 }
 #endif
 
-#ifdef WIN32
+#ifdef _WIN32
 extern YYSTYPE yylval;
 #endif
 #ifdef TOKEN_DEBUG
@@ -1023,7 +1023,7 @@ static int yy_top_state YY_PROTO(( void ));
 #ifdef YY_MALLOC_DECL
 YY_MALLOC_DECL
 #else
-#if __STDC__
+#if defined __STDC__ && __STDC__
 #ifndef __cplusplus
 #include <stdlib.h>
 #endif
@@ -1288,6 +1288,7 @@ case 23:
 YY_RULE_SETUP
 { token_debug(" ==>Ignore[\\rm]\n"); }
     //YY_BREAK
+SAL_FALLTHROUGH;
 case 24:
 YY_RULE_SETUP
 { yylval.str = yytext+1; token_debug("  ==>General_Iden[%s]\n",yytext+1); return GENERAL_IDEN; }
@@ -1332,10 +1333,12 @@ case 34:
 YY_RULE_SETUP
 { yylval.str = yytext+1; token_debug("  ==>Space_Symbol[%s]\n",yytext+1); /*return SPACE_SYMBOL;*/ }
     //YY_BREAK
+SAL_FALLTHROUGH;
 case 35:
 YY_RULE_SETUP
 { yylval.str = strdup("quad"); token_debug("    ==>Space_Symbol[quad]\n"); /* return SPACE_SYMBOL;*/ }
     //YY_BREAK
+SAL_FALLTHROUGH;
 case 36:
 YY_RULE_SETUP
 { yylval.dval = yytext;  token_debug("  ==>Digit[%s]\n",yytext); return DIGIT; }
diff --git a/hwpfilter/source/list.hxx b/hwpfilter/source/list.hxx
deleted file mode 100644
index 9cdf7df8ed4f..000000000000
--- a/hwpfilter/source/list.hxx
+++ /dev/null
@@ -1,146 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * This file incorporates work covered by the following license notice:
- *
- *   Licensed to the Apache Software Foundation (ASF) under one or more
- *   contributor license agreements. See the NOTICE file distributed
- *   with this work for additional information regarding copyright
- *   ownership. The ASF licenses this file to you under the Apache
- *   License, Version 2.0 (the "License"); you may not use this file
- *   except in compliance with the License. You may obtain a copy of
- *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
- */
-
-#ifndef INCLUDED_HWPFILTER_SOURCE_LIST_HXX
-#define INCLUDED_HWPFILTER_SOURCE_LIST_HXX
-
-/**
- * Re-implement a simple container: LinkedList + LinkedListIterator
- *
- * DO NOT USE EXCEPT FOR REPLACING THE ORIGINAL LinkedList/LinkedListIterator!
- * USE STL CONTAINERS FOR NEW CODE!
- *
- * The classes LinkedList and LinkedListIterator were originally
- * implemented in two files LinkedList.cxx/.h, whose license would not
- * allow re-distribution through OpenOffice.org. This file
- * re-implements the same functionality, based on the STL.
- */
-
-#include <cstddef>
-#include <vector>
-
-template<class T>
-class LinkedList
-{
-    typedef std::vector<T*> list_t;
-    list_t maList;
-
-public:
-    /// construct list with one element (pItem) or no element (pItem == NULL)
-    explicit LinkedList( T* pItem = 0 );
-    ~LinkedList();
-};
-
-/** iterator class for LinkedList<T>. Iterator may travel outside of
- * list using operator++/--, in which case current() must return
- * NULL. */
-template<class T>
-class LinkedListIterator
-{
-    // iterator state: reference list + position
-    LinkedList<T>* mpList;
-    int mnPosition;
-
-public:
-    /// construct list with single element
-    explicit LinkedListIterator( LinkedList<T>* pList = 0 );
-    ~LinkedListIterator();
-
-    // bug-compatible with original LinkedList.h/cxx: Ignore parameter!
-    void operator++( int );   /// advance iterator by one step (ignore n !!!)
-    void operator--( int );   /// go one step backwards (ignore n !!!)
-    void operator++();        /// advance iterator by one step
-    void operator--();        /// go one step backwards
-
-};
-
-
-// IMPLEMENTATION
-
-// (the implementation of template classes must be accessible to using
-// code, hence this implementation is in the header.)
-
-
-#include <algorithm>
-
-// define assert based on SAL, so we do not introduce a tools dependency
-#include <osl/diagnose.h>
-#define ASSERT(x) OSL_ENSURE((x), " HWP FILTER: " #x)
-
-
-template<class T>
-LinkedList<T>::LinkedList( T* pItem )
-{
-    if( pItem != nullptr )
-        maList.push_back( pItem );
-}
-
-template<class T>
-LinkedList<T>::~LinkedList()
-{
-}
-
-template<class T>
-LinkedListIterator<T>::LinkedListIterator( LinkedList<T>* pList ) :
-    mpList( pList ),
-    mnPosition( 0 )
-{
-    ASSERT( pList != nullptr );
-}
-
-template<class T>
-LinkedListIterator<T>::~LinkedListIterator()
-{
-}
-
-template<class T>
-void LinkedListIterator<T>::operator++( int )
-{
-    ASSERT( mpList != nullptr );
-
-    // bug-compatible with LinkedList.cxx: ignore parameter!
-    mnPosition ++;
-}
-
-template<class T>
-void LinkedListIterator<T>::operator--( int )
-{
-    ASSERT( mpList != nullptr );
-
-    // bug-compatible with LinkedList.cxx: ignore parameter!
-    mnPosition --;
-}
-
-template<class T>
-void LinkedListIterator<T>::operator++()
-{
-    ASSERT( mpList != nullptr );
-    mnPosition ++;
-}
-
-template<class T>
-void LinkedListIterator<T>::operator--()
-{
-    ASSERT( mpList != nullptr );
-    mnPosition --;
-}
-
-#endif
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/hwpfilter/source/mapping.h b/hwpfilter/source/mapping.h
index e8d6c0d33a7c..6a6fb8a912f4 100644
--- a/hwpfilter/source/mapping.h
+++ b/hwpfilter/source/mapping.h
@@ -363,7 +363,7 @@ const struct FormulaEntry FormulaMapTab[] = {
 #ifndef DEBUG
 hchar_string getMathMLEntity(const char *tex)
 {
-     static const size_t tabSize = sizeof(FormulaMapTab) / sizeof(FormulaMapTab[0]);
+     static const size_t tabSize = SAL_N_ELEMENTS(FormulaMapTab);
 
      hchar_string buf;
      for (size_t i = 0 ; i < tabSize ; i++) {
diff --git a/hwpfilter/source/mzstring.cxx b/hwpfilter/source/mzstring.cxx
index 9f5563a2d70a..b99b9d0044ee 100644
--- a/hwpfilter/source/mzstring.cxx
+++ b/hwpfilter/source/mzstring.cxx
@@ -23,22 +23,14 @@
 
 #include "mzstring.h"
 
-#ifndef WIN32
-#else
-
-    #if defined _MSC_VER
-        #pragma warning(push, 1)
-    #endif
+#ifdef _WIN32
 # include <windows.h>
-    #if defined _MSC_VER
-        #pragma warning(pop)
-    #endif
-#endif                                            /* WIN32 */
+#endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
-#ifndef WIN32
+#ifndef _WIN32
 # define wsprintf sprintf
 #endif
 
@@ -65,7 +57,7 @@ MzString::~MzString()
 }
 
 
-MzString &MzString::operator = (MzString &s)
+MzString &MzString::operator=(const MzString &s)
 {
     int n = s.length();
     if (allocate(n))
@@ -167,29 +159,6 @@ int MzString::rfind(char ch, int pos)
 }
 
 
-// += operator
-
-MzString &MzString::operator += (char ch)
-{
-    append(&ch, 1);
-    return *this;
-}
-
-
-MzString &MzString::operator += (const char *str)
-{
-    append(str);
-    return *this;
-}
-
-
-MzString &MzString::operator += (MzString const &s)
-{
-    append(s);
-    return *this;
-}
-
-
 // << operator
 MzString &MzString::operator << (const char *str)
 {
diff --git a/hwpfilter/source/mzstring.h b/hwpfilter/source/mzstring.h
index fe84694f1f40..4afb0e54fb8a 100644
--- a/hwpfilter/source/mzstring.h
+++ b/hwpfilter/source/mzstring.h
@@ -82,7 +82,6 @@ class MzString
 {
     public:
         MzString();                               // Create an empty string
-// if len = 0, len becomes s.length)
         ~MzString();
 
         int       length() const;
@@ -94,13 +93,10 @@ class MzString
         bool      resize(int len);
 
 // Assignment
-        MzString  &operator = (MzString &s);
+        MzString  &operator = (const MzString &s);
         MzString  &operator = (const char *s);
 
 // Appending
-        MzString  &operator += (char);
-        MzString  &operator += (const char *);
-        MzString  &operator += (MzString const &);
 
         MzString  &operator << (const char *);
         MzString  &operator << (char);
diff --git a/hwpfilter/source/nodes.h b/hwpfilter/source/nodes.h
index 7d93f104b0ca..539452244d5a 100644
--- a/hwpfilter/source/nodes.h
+++ b/hwpfilter/source/nodes.h
@@ -22,7 +22,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "list.hxx"
+#include <osl/diagnose.h>
 
 enum IDLIST {
      ID_MATHML,
commit 3cdac6bb2defce45342dff04400c7a37bb8a2453
Author: Eike Rathke <erack at redhat.com>
Date:   Thu Apr 20 22:06:23 2017 +0200

    add ICU changeset-39671 fix for CVE-2017-7867 CVE-2017-7868
    
    http://bugs.icu-project.org/trac/changeset/39671
    https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=213
    https://bugzilla.redhat.com/show_bug.cgi?id=1444101
    
    Reviewed-on: https://gerrit.libreoffice.org/36754
    Reviewed-by: Eike Rathke <erack at redhat.com>
    Tested-by: Jenkins <ci at libreoffice.org>
    (cherry picked from commit c7de8233d15ed0c90fef6c49a54d60cf10119f58)
    
    Backported to older MSVC using the UGLY_SIZEOF_MAPTOUCHARS macro instead
    of sizeof(UTF8Buf::mapToUChars).
    
    Change-Id: I4e776ad4fe63c77057b0c823f8672a2b6703346f
    Reviewed-on: https://gerrit.libreoffice.org/36776
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Michael Stahl <mstahl at redhat.com>
    (cherry picked from commit 91f5d002884cae1a60768e9caa9d182f41fb7be6)

diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
index 4a6a11477af3..86369ef8e43b 100644
--- a/external/icu/UnpackedTarball_icu.mk
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -28,6 +28,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
 	$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.diff) \
 	$(if $(filter EMSCRIPTEN,$(OS)),external/icu/icu4c-emscripten.patch.1) \
 	external/icu/khmerbreakengine.patch \
+	external/icu/icu4c-changeset-39671.patch.1 \
 ))
 
 $(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
diff --git a/external/icu/icu4c-changeset-39671.patch.1 b/external/icu/icu4c-changeset-39671.patch.1
new file mode 100644
index 000000000000..b8ac1385364e
--- /dev/null
+++ b/external/icu/icu4c-changeset-39671.patch.1
@@ -0,0 +1,189 @@
+diff -ur icu.org/source/common/utext.cpp icu/source/common/utext.cpp
+--- icu.org/source/common/utext.cpp	2016-06-15 20:58:17.000000000 +0200
++++ icu/source/common/utext.cpp	2017-04-21 16:38:15.993398034 +0200
+@@ -847,9 +847,15 @@
+ //------------------------------------------------------------------------------
+ 
+ // Chunk size.
+-//     Must be less than 85, because of byte mapping from UChar indexes to native indexes.
+-//     Worst case is three native bytes to one UChar.  (Supplemenaries are 4 native bytes
+-//     to two UChars.)
++//     Must be less than 42  (256/6), because of byte mapping from UChar indexes to native indexes.
++//     Worst case there are six UTF-8 bytes per UChar.
++//         obsolete 6 byte form fd + 5 trails maps to fffd
++//         obsolete 5 byte form fc + 4 trails maps to fffd
++//         non-shortest 4 byte forms maps to fffd
++//         normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit
++//     mapToUChars array size must allow for the worst case, 6.
++//     This could be brought down to 4, by treating fd and fc as pure illegal,
++//     rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros.
+ //
+ enum { UTF8_TEXT_CHUNK_SIZE=32 };
+ 
+@@ -867,6 +873,15 @@
+ //     pair.  Doing this is simpler than checking for the edge case.
+ //
+ 
++// erAck: older MSVC used on libreoffice-5-3 and 5-2 bails out with
++// error C2070: 'unknown': illegal sizeof operand
++// for sizeof(UTF8Buf::mapToUChars)
++// so have an ugly workaround:
++// First define a macro of the original size expression, so a follow-up patch
++// on the original code would fail..
++#define UGLY_MAPTOUCHARS_SIZE (UTF8_TEXT_CHUNK_SIZE*6+6)
++#define UGLY_SIZEOF_MAPTOUCHARS (sizeof(uint8_t)*(UGLY_MAPTOUCHARS_SIZE))
++
+ struct UTF8Buf {
+     int32_t   bufNativeStart;                        // Native index of first char in UChar buf
+     int32_t   bufNativeLimit;                        // Native index following last char in buf.
+@@ -889,7 +904,7 @@
+                                                      //  Requires two extra slots,
+                                                      //    one for a supplementary starting in the last normal position,
+                                                      //    and one for an entry for the buffer limit position.
+-    uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
++    uint8_t   mapToUChars[UGLY_MAPTOUCHARS_SIZE];    // Map native offset from bufNativeStart to
+                                                      //   correspoding offset in filled part of buf.
+     int32_t   align;
+ };
+@@ -1032,6 +1047,7 @@
+             // Requested index is in this buffer.
+             u8b = (UTF8Buf *)ut->p;   // the current buffer
+             mapIndex = ix - u8b->toUCharsMapStart;
++            U_ASSERT(mapIndex < (int32_t)UGLY_SIZEOF_MAPTOUCHARS);
+             ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
+             return TRUE;
+ 
+@@ -1298,6 +1314,10 @@
+         // Can only do this if the incoming index is somewhere in the interior of the string.
+         //   If index is at the end, there is no character there to look at.
+         if (ix != ut->b) {
++            // Note: this function will only move the index back if it is on a trail byte
++            //       and there is a preceding lead byte and the sequence from the lead 
++            //       through this trail could be part of a valid UTF-8 sequence
++            //       Otherwise the index remains unchanged.
+             U8_SET_CP_START(s8, 0, ix);
+         }
+ 
+@@ -1311,7 +1331,10 @@
+         UChar   *buf = u8b->buf;
+         uint8_t *mapToNative = u8b->mapToNative;
+         uint8_t *mapToUChars = u8b->mapToUChars;
+-        int32_t  toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);
++        int32_t  toUCharsMapStart = ix - UGLY_SIZEOF_MAPTOUCHARS + 1;
++        // Note that toUCharsMapStart can be negative. Happens when the remaining
++        // text from current position to the beginning is less than the buffer size.
++        // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
+         int32_t  destIx = UTF8_TEXT_CHUNK_SIZE+2;   // Start in the overflow region
+                                                     //   at end of buffer to leave room
+                                                     //   for a surrogate pair at the
+@@ -1338,6 +1361,7 @@
+             if (c<0x80) {
+                 // Special case ASCII range for speed.
+                 buf[destIx] = (UChar)c;
++                U_ASSERT(toUCharsMapStart <= srcIx);
+                 mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
+                 mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
+             } else {
+@@ -1367,6 +1391,7 @@
+                 do {
+                     mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
+                 } while (sIx >= srcIx);
++                U_ASSERT(toUCharsMapStart <= (srcIx+1));
+ 
+                 // Set native indexing limit to be the current position.
+                 //   We are processing a non-ascii, non-native-indexing char now;
+@@ -1541,6 +1566,7 @@
+     U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
+     U_ASSERT(index<=ut->chunkNativeLimit);
+     int32_t mapIndex = index - u8b->toUCharsMapStart;
++    U_ASSERT(mapIndex < (int32_t)UGLY_SIZEOF_MAPTOUCHARS);
+     int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
+     U_ASSERT(offset>=0 && offset<=ut->chunkLength);
+     return offset;
+diff -ur icu.org/source/test/intltest/utxttest.cpp icu/source/test/intltest/utxttest.cpp
+--- icu.org/source/test/intltest/utxttest.cpp	2016-06-15 20:58:17.000000000 +0200
++++ icu/source/test/intltest/utxttest.cpp	2017-04-21 16:14:57.383814739 +0200
+@@ -67,6 +67,8 @@
+             if (exec) Ticket10983();  break;
+         case 7: name = "Ticket12130";
+             if (exec) Ticket12130(); break;
++        case 8: name = "Ticket12888";
++            if (exec) Ticket12888(); break;
+         default: name = "";          break;
+     }
+ }
+@@ -1583,3 +1585,63 @@
+     }
+     utext_close(&ut);
+ }
++
++// Ticket 12888: bad handling of illegal utf-8 containing many instances of the archaic, now illegal,
++//               six byte utf-8 forms. Original implementation had an assumption that
++//               there would be at most three utf-8 bytes per UTF-16 code unit.
++//               The five and six byte sequences map to a single replacement character.
++
++void UTextTest::Ticket12888() {
++    const char *badString = 
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
++            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80";
++
++    UErrorCode status = U_ZERO_ERROR;
++    LocalUTextPointer ut(utext_openUTF8(NULL, badString, -1, &status));
++    TEST_SUCCESS(status);
++    for (;;) {
++        UChar32 c = utext_next32(ut.getAlias());
++        if (c == U_SENTINEL) {
++            break;
++        }
++    }
++    int32_t endIdx = utext_getNativeIndex(ut.getAlias());
++    if (endIdx != (int32_t)strlen(badString)) {
++        errln("%s:%d expected=%d, actual=%d", __FILE__, __LINE__, strlen(badString), endIdx);
++        return;
++    }
++
++    for (int32_t prevIndex = endIdx; prevIndex>0;) {
++        UChar32 c = utext_previous32(ut.getAlias());
++        int32_t currentIndex = utext_getNativeIndex(ut.getAlias());
++        if (c != 0xfffd) {
++            errln("%s:%d (expected, actual, index) = (%d, %d, %d)\n",
++                    __FILE__, __LINE__, 0xfffd, c, currentIndex);
++            break;
++        }
++        if (currentIndex != prevIndex - 6) {
++            errln("%s:%d: wrong index. Expected, actual = %d, %d",
++                    __FILE__, __LINE__, prevIndex - 6, currentIndex);
++            break;
++        }
++        prevIndex = currentIndex;
++    }
++}
+diff -ur icu.org/source/test/intltest/utxttest.h icu/source/test/intltest/utxttest.h
+--- icu.org/source/test/intltest/utxttest.h	2016-06-15 20:58:17.000000000 +0200
++++ icu/source/test/intltest/utxttest.h	2017-04-21 16:14:57.383814739 +0200
+@@ -38,6 +38,7 @@
+     void Ticket10562();
+     void Ticket10983();
+     void Ticket12130();
++    void Ticket12888();
+ 
+ private:
+     struct m {                              // Map between native indices & code points.
commit 0f849dd2b9d789bc01890b9e22772dac2b5d74b3
Author: Martin Hosken <martin_hosken at sil.org>
Date:   Fri Apr 15 20:26:08 2016 +0200

    reactivate ICU Khmer patch
    
    Patch has been upstreamed with
    https://ssl.icu-project.org/trac/ticket/12504
    
    Change-Id: I1f3ddad87a2a6568ced3f9d2b2df3e0af0ee18aa
    Reviewed-on: https://gerrit.libreoffice.org/24117
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Martin Hosken <martin_hosken at sil.org>
    Reviewed-by: Eike Rathke <erack at redhat.com>
    Tested-by: Eike Rathke <erack at redhat.com>
    (cherry picked from commit 4e066825d43400969041669c82d8a4e0bfd91adf)

diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
index c48d02556d2d..4a6a11477af3 100644
--- a/external/icu/UnpackedTarball_icu.mk
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -27,6 +27,9 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
 	external/icu/clang-cl.patch.0 \
 	$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.diff) \
 	$(if $(filter EMSCRIPTEN,$(OS)),external/icu/icu4c-emscripten.patch.1) \
+	external/icu/khmerbreakengine.patch \
 ))
 
+$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
+
 # vim: set noet sw=4 ts=4:
diff --git a/external/icu/khmerbreakengine.patch b/external/icu/khmerbreakengine.patch
index 0687645e8790..8f81f315da3e 100644
--- a/external/icu/khmerbreakengine.patch
+++ b/external/icu/khmerbreakengine.patch
@@ -478,9 +478,9 @@ index f1c874d..3ad1b3f 100644
 -                UChar32 uc;
 -                int32_t chars = 0;
 -                for (;;) {
--                    int32_t pcIndex = utext_getNativeIndex(text);
+-                    int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
 -                    pc = utext_next32(text);
--                    int32_t pcSize = utext_getNativeIndex(text) - pcIndex;
+-                    int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
 -                    chars += pcSize;
 -                    remaining -= pcSize;
 -                    if (remaining <= 0) {
@@ -1000,10 +1000,10 @@ index cb594c6..82f2e77 100644
 +                            int32_t *prefix, UnicodeSet const* ignoreSet, int32_t minLength) const {
  
      UCharsTrie uct(characters);
-     int32_t startingTextIndex = utext_getNativeIndex(text);
+     int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
 @@ -53,7 +53,13 @@ int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
          UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
-         int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex;
+         int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
          codePointsMatched += 1;
 +        if (ignoreSet != NULL && ignoreSet->contains(c)) {
 +            continue;
@@ -1022,11 +1022,11 @@ index cb594c6..82f2e77 100644
 -                            int32_t *prefix) const {
 +                            int32_t *prefix, UnicodeSet const* ignoreSet, int32_t minLength) const {
      BytesTrie bt(characters);
-     int32_t startingTextIndex = utext_getNativeIndex(text);
+     int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
      int32_t wordCount = 0;
 @@ -120,7 +126,13 @@ int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
          UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
-         int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex;
+         int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
          codePointsMatched += 1;
 +        if (ignoreSet != NULL && ignoreSet->contains(c)) {
 +            continue;
@@ -1081,7 +1081,7 @@ diff --git a/source/data/Makefile.in b/source/data/Makefile.in
 index 816c82d..c637d70 100644
 --- misc/icu/source/data/Makefile.in
 +++ build/icu/source/data/Makefile.in
-@@ -179,7 +179,7 @@ endif
+@@ -181,7 +181,7 @@ endif
  endif
  endif
  
@@ -1090,17 +1090,17 @@ index 816c82d..c637d70 100644
  ifneq ($(ENABLE_STATIC),)
  ifeq ($(PKGDATA_MODE),dll)
  	$(PKGDATA_INVOKE) $(PKGDATA) -e $(ICUDATA_ENTRY_POINT) -T $(OUTTMPDIR) -p $(ICUDATA_NAME) $(PKGDATA_LIBSTATICNAME) -m static $(PKGDATA_VERSIONING) $(PKGDATA_LIST)
-@@ -563,8 +563,14 @@ $(BRKBLDDIR)/burmesedict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES)
- 	$(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1000 -c -i $(BUILDDIR) $(BRKSRCDIR)/burmesedict.txt $(BRKBLDDIR)/burmesedict.dict
+@@ -564,8 +564,14 @@ $(BRKBLDDIR)/burmesedict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES)
+ 	$(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1000 -c -i $(BUILDDIR) $(DICTSRCDIR)/burmesedict.txt $(BRKBLDDIR)/burmesedict.dict
 
  # TODO: figure out why combining characters are here?
 -$(BRKBLDDIR)/khmerdict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES)
--	$(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(BRKSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
+-	$(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(DICTSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
 +#$(BRKBLDDIR)/khmerdict.dict: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(DAT_FILES)
-+#	$(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(BRKSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
++#	$(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(DICTSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
 +
 +#$(MAINBUILDDIR)/khmerdict.stamp: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(BRKSRCDIR)/khmerdict.txt build-local
-+# 	$(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(BRKSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
++# 	$(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(DICTSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
 +$(MAINBUILDDIR)/khmerdict.stamp: $(BRKSRCDIR)/khmerdict.dict build-local
 +	cp $< $(BRKBLDDIR)
 +	echo "timestamp" > $@
commit 05d9fd7b3416ef24e94973c1bdef458636ccaf7f
Author: Eike Rathke <erack at redhat.com>
Date:   Wed Apr 13 22:24:25 2016 +0200

    upgrade to ICU 57
    
    This does not apply patches
    
    external/icu/khmerbreakengine.patch
    external/icu/khmerdict.dict
    
    anymore, as the khmerbreakengine.patch failed to apply with several
    hunks of which one was 16k. Asking the patch contributor to follow-up on
    this.
    
    Change-Id: I78d4371d04a7b03417d402a222bcd384f02a619e
    Reviewed-on: https://gerrit.libreoffice.org/24067
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Eike Rathke <erack at redhat.com>
    Tested-by: Eike Rathke <erack at redhat.com>
    (cherry picked from commit c58655c5a221d986fa3c3eed2f28810269205721)

diff --git a/configure.ac b/configure.ac
index 6b003c4fc545..2ebb0ec7194d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -9023,7 +9023,7 @@ SYSTEM_GENBRK=
 SYSTEM_GENCCODE=
 SYSTEM_GENCMN=
 
-ICU_MAJOR=56
+ICU_MAJOR=57
 ICU_MINOR=1
 ICU_RECLASSIFIED_PREPEND_SET_EMPTY="TRUE"
 ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER="TRUE"
diff --git a/download.lst b/download.lst
index 53fc12297f5b..ebbba25d9080 100755
--- a/download.lst
+++ b/download.lst
@@ -63,7 +63,7 @@ export HARFBUZZ_TARBALL := harfbuzz-0.9.40.tar.bz2
 export HSQLDB_TARBALL := 17410483b5b5f267aa18b7e00b65e6e0-hsqldb_1_8_0.zip
 export HUNSPELL_TARBALL := 4967da60b23413604c9e563beacc63b4-hunspell-1.3.3.tar.gz
 export HYPHEN_TARBALL := 5ade6ae2a99bc1e9e57031ca88d36dad-hyphen-2.8.8.tar.gz
-export ICU_TARBALL := c4a2d71ff56aec5ebfab2a3f059be99d-icu4c-56_1-src.tgz
+export ICU_TARBALL := 976734806026a4ef8bdd17937c8898b9-icu4c-57_1-src.tgz
 export JFREEREPORT_FLOW_ENGINE_TARBALL := ba2930200c9f019c2d93a8c88c651a0f-flow-engine-0.9.4.zip
 export JFREEREPORT_FLUTE_TARBALL := d8bd5eed178db6e2b18eeed243f85aa8-flute-1.1.6.zip
 export JFREEREPORT_LIBBASE_TARBALL := eeb2c7ddf0d302fba4bfc6e97eac9624-libbase-1.1.6.zip
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
index 4a6a11477af3..c48d02556d2d 100644
--- a/external/icu/UnpackedTarball_icu.mk
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -27,9 +27,6 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
 	external/icu/clang-cl.patch.0 \
 	$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.diff) \
 	$(if $(filter EMSCRIPTEN,$(OS)),external/icu/icu4c-emscripten.patch.1) \
-	external/icu/khmerbreakengine.patch \
 ))
 
-$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
-
 # vim: set noet sw=4 ts=4:
commit aa4b3ec51803ade29323273668a516e7f18bdf95
Author: Martin Hosken <martin_hosken at sil.org>
Date:   Tue Mar 22 11:26:52 2016 +0700

    Fix wrong pattern definitions in khmer dictionary breaker
    
    Change-Id: I0132196744046391759a6e5110d054feee3deea3
    Reviewed-on: https://gerrit.libreoffice.org/23420
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Martin Hosken <martin_hosken at sil.org>
    (cherry picked from commit 7f36f4ce9f9f3d430009ba472d275d038abecb16)

diff --git a/external/icu/khmerbreakengine.patch b/external/icu/khmerbreakengine.patch
index bc0d287929b0..0687645e8790 100644
--- a/external/icu/khmerbreakengine.patch
+++ b/external/icu/khmerbreakengine.patch
@@ -15,8 +15,8 @@ index f1c874d..3ad1b3f 100644
 +    fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
 +
 +    // note Skip Sets contain fIgnoreSet characters too.
-+    fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]]\\u200C\\u200D\\u2060"), status);
-+    fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]]\\u200C\\u200D\\u2060"), status);
++    fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]\\u200C\\u200D\\u2060]"), status);
++    fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]\\u200C\\u200D\\u2060]"), status);
 +    fNBeforeSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
  }
  
@@ -332,10 +332,10 @@ index f1c874d..3ad1b3f 100644
 +        startZwsp = scanBeforeStart(text, scanStart, breakStart);
 +    }
 +    utext_setNativeIndex(text, rangeStart);
-+    scanFwdClusters(text, rangeStart, initAfter);
++    scanFwdClusters(text, rangeEnd, initAfter);
 +    bool endZwsp = scanAfterEnd(text, utext_nativeLength(text), scanEnd, breakEnd);
 +    utext_setNativeIndex(text, rangeEnd - 1);
-+    scanBackClusters(text, rangeEnd, finalBefore);
++    scanBackClusters(text, rangeStart, finalBefore);
 +    if (finalBefore < initAfter) {   // the whole run is tented so no breaks
 +        if (breakStart || fTypes < UBRK_LINE)
 +            foundBreaks.push(rangeStart, status);
@@ -539,7 +539,7 @@ index f1c874d..3ad1b3f 100644
 +            int32_t ln = lengths.elementAti(j);
 +            utext_setNativeIndex(text, ln+ix);
 +            int32_t c = utext_current32(text);
-+            while (fPuncSet.contains(c) || fIgnoreSet.contains(c)) {
++            while ((fPuncSet.contains(c) || fIgnoreSet.contains(c)) && ln + i < numCodePts) {
 +                ++ln;
 +                utext_next32(text);
 +                c = utext_current32(text);
commit 55dece94611e1b2a8a1974d11c10050d8d74b5f7
Author: Martin Hosken <martin_hosken at sil.org>
Date:   Thu Mar 17 09:57:35 2016 +0700

    Fix bug in khmr linebreaking and update dictionary
    
    Change-Id: I2b776925c2c95cb56ccd592d036823c26054e059
    Reviewed-on: https://gerrit.libreoffice.org/23316
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Martin Hosken <martin_hosken at sil.org>
    (cherry picked from commit a976a19ca82661d8b459b85f5514b0e4c9222d47)

diff --git a/external/icu/khmerbreakengine.patch b/external/icu/khmerbreakengine.patch
index ba3e392a27f3..bc0d287929b0 100644
--- a/external/icu/khmerbreakengine.patch
+++ b/external/icu/khmerbreakengine.patch
@@ -2,7 +2,7 @@ diff --git a/source/common/dictbe.cpp b/source/common/dictbe.cpp
 index f1c874d..3ad1b3f 100644
 --- misc/icu/source/common/dictbe.cpp
 +++ build/icu/source/common/dictbe.cpp
-@@ -27,8 +27,16 @@ U_NAMESPACE_BEGIN
+@@ -27,8 +27,17 @@ U_NAMESPACE_BEGIN
   ******************************************************************
   */
  
@@ -14,13 +14,14 @@ index f1c874d..3ad1b3f 100644
      fTypes = breakTypes;
 +    fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
 +
++    // note Skip Sets contain fIgnoreSet characters too.
 +    fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]]\\u200C\\u200D\\u2060"), status);
 +    fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]]\\u200C\\u200D\\u2060"), status);
 +    fNBeforeSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
  }
  
  DictionaryBreakEngine::~DictionaryBreakEngine() {
-@@ -90,7 +98,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
+@@ -90,7 +99,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
          result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
          utext_setNativeIndex(text, current);
      }
@@ -29,7 +30,7 @@ index f1c874d..3ad1b3f 100644
      return result;
  }
  
-@@ -101,6 +109,163 @@ DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
+@@ -101,6 +110,169 @@ DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
      fSet.compact();
  }
  
@@ -87,6 +88,8 @@ index f1c874d..3ad1b3f 100644
 +    }
 +    for (int i = 0; i < clusterLimit; ++i) { // scan backwards clusterLimit clusters
 +        while (start > textStart) {
++            while (fIgnoreSet.contains(c))
++                c = utext_previous32(text);
 +            if (!fMarkSet.contains(c)) {
 +                if (fBaseSet.contains(c)) {
 +                    c = utext_previous32(text);
@@ -125,6 +128,10 @@ index f1c874d..3ad1b3f 100644
 +        ++end;
 +    }
 +    for (int i = 0; i < clusterLimit; ++i) { // scan forwards clusterLimit clusters
++        while (fIgnoreSet.contains(c)) {
++            utext_next32(text);
++            c = utext_current32(text);
++        }
 +        if (fBaseSet.contains(c)) {
 +            while (end < textEnd) {
 +                utext_next32(text);
@@ -193,7 +200,7 @@ index f1c874d..3ad1b3f 100644
  /*
   ******************************************************************
   * PossibleWord
-@@ -128,35 +293,35 @@ private:
+@@ -128,35 +302,35 @@ private:
  public:
      PossibleWord() : count(0), prefix(0), offset(-1), mark(0), current(0) {};
      ~PossibleWord() {};
@@ -238,242 +245,7 @@ index f1c874d..3ad1b3f 100644
          // Dictionary leaves text after longest prefix, not longest word. Back up.
          if (count <= 0) {
              utext_setNativeIndex(text, start);
-@@ -261,16 +426,16 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
-     int32_t current;
-     UErrorCode status = U_ZERO_ERROR;
-     PossibleWord words[THAI_LOOKAHEAD];
--    
-+
-     utext_setNativeIndex(text, rangeStart);
--    
-+
-     while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
-         cpWordLength = 0;
-         cuWordLength = 0;
- 
-         // Look for candidate words at the current position
-         int32_t candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
--        
-+
-         // If we found exactly one, use that
-         if (candidates == 1) {
-             cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text);
-@@ -291,12 +456,12 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
-                         words[wordsFound%THAI_LOOKAHEAD].markCurrent();
-                         wordsMatched = 2;
-                     }
--                    
-+
-                     // If we're already at the end of the range, we're done
-                     if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
-                         goto foundBest;
-                     }
--                    
-+
-                     // See if any of the possible second words is followed by a third word
-                     do {
-                         // If we find a third word, stop right away
-@@ -315,13 +480,13 @@ foundBest:
-             cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength();
-             wordsFound += 1;
-         }
--        
-+
-         // We come here after having either found a word or not. We look ahead to the
-         // next word. If it's not a dictionary word, we will combine it with the word we
-         // just found (if there is one), but only if the preceding word does not exceed
-         // the threshold.
-         // The text iterator should now be positioned at the end of the word we found.
--        
-+
-         UChar32 uc = 0;
-         if ((int32_t)utext_getNativeIndex(text) < rangeEnd &&  cpWordLength < THAI_ROOT_COMBINE_THRESHOLD) {
-             // if it is a dictionary word, do nothing. If it isn't, then if there is
-@@ -357,12 +522,12 @@ foundBest:
-                         }
-                     }
-                 }
--                
-+
-                 // Bump the word count if there wasn't already one
-                 if (cuWordLength <= 0) {
-                     wordsFound += 1;
-                 }
--                
-+
-                 // Update the length with the passed-over characters
-                 cuWordLength += chars;
-             }
-@@ -371,14 +536,14 @@ foundBest:
-                 utext_setNativeIndex(text, current+cuWordLength);
-             }
-         }
--        
-+
-         // Never stop before a combining mark.
-         int32_t currPos;
-         while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
-             utext_next32(text);
-             cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
-         }
--        
-+
-         // Look ahead for possible suffixes if a dictionary word does not follow.
-         // We do this in code rather than using a rule so that the heuristic
-         // resynch continues to function. For example, one of the suffix characters
-@@ -496,16 +661,16 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
-     int32_t current;
-     UErrorCode status = U_ZERO_ERROR;
-     PossibleWord words[LAO_LOOKAHEAD];
--    
-+
-     utext_setNativeIndex(text, rangeStart);
--    
-+
-     while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
-         cuWordLength = 0;
-         cpWordLength = 0;
- 
-         // Look for candidate words at the current position
-         int32_t candidates = words[wordsFound%LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
--        
-+
-         // If we found exactly one, use that
-         if (candidates == 1) {
-             cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text);
-@@ -526,12 +691,12 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
-                         words[wordsFound%LAO_LOOKAHEAD].markCurrent();
-                         wordsMatched = 2;
-                     }
--                    
-+
-                     // If we're already at the end of the range, we're done
-                     if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
-                         goto foundBest;
-                     }
--                    
-+
-                     // See if any of the possible second words is followed by a third word
-                     do {
-                         // If we find a third word, stop right away
-@@ -549,7 +714,7 @@ foundBest:
-             cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength();
-             wordsFound += 1;
-         }
--        
-+
-         // We come here after having either found a word or not. We look ahead to the
-         // next word. If it's not a dictionary word, we will combine it withe the word we
-         // just found (if there is one), but only if the preceding word does not exceed
-@@ -587,12 +752,12 @@ foundBest:
-                         }
-                     }
-                 }
--                
-+
-                 // Bump the word count if there wasn't already one
-                 if (cuWordLength <= 0) {
-                     wordsFound += 1;
-                 }
--                
-+
-                 // Update the length with the passed-over characters
-                 cuWordLength += chars;
-             }
-@@ -601,14 +766,14 @@ foundBest:
-                 utext_setNativeIndex(text, current + cuWordLength);
-             }
-         }
--        
-+
-         // Never stop before a combining mark.
-         int32_t currPos;
-         while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
-             utext_next32(text);
-             cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
-         }
--        
-+
-         // Look ahead for possible suffixes if a dictionary word does not follow.
-         // We do this in code rather than using a rule so that the heuristic
-         // resynch continues to function. For example, one of the suffix characters
-@@ -689,16 +854,16 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
-     int32_t current;
-     UErrorCode status = U_ZERO_ERROR;
-     PossibleWord words[BURMESE_LOOKAHEAD];
--    
-+
-     utext_setNativeIndex(text, rangeStart);
--    
-+
-     while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
-         cuWordLength = 0;
-         cpWordLength = 0;
- 
-         // Look for candidate words at the current position
-         int32_t candidates = words[wordsFound%BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
--        
-+
-         // If we found exactly one, use that
-         if (candidates == 1) {
-             cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text);
-@@ -719,12 +884,12 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
-                         words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
-                         wordsMatched = 2;
-                     }
--                    
-+
-                     // If we're already at the end of the range, we're done
-                     if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
-                         goto foundBest;
-                     }
--                    
-+
-                     // See if any of the possible second words is followed by a third word
-                     do {
-                         // If we find a third word, stop right away
-@@ -742,7 +907,7 @@ foundBest:
-             cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength();
-             wordsFound += 1;
-         }
--        
-+
-         // We come here after having either found a word or not. We look ahead to the
-         // next word. If it's not a dictionary word, we will combine it withe the word we
-         // just found (if there is one), but only if the preceding word does not exceed
-@@ -780,12 +945,12 @@ foundBest:
-                         }
-                     }
-                 }
--                
-+
-                 // Bump the word count if there wasn't already one
-                 if (cuWordLength <= 0) {
-                     wordsFound += 1;
-                 }
--                
-+
-                 // Update the length with the passed-over characters
-                 cuWordLength += chars;
-             }
-@@ -794,14 +959,14 @@ foundBest:
-                 utext_setNativeIndex(text, current + cuWordLength);
-             }
-         }
--        
-+
-         // Never stop before a combining mark.
-         int32_t currPos;
-         while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
-             utext_next32(text);
-             cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
-         }
--        
-+
-         // Look ahead for possible suffixes if a dictionary word does not follow.
-         // We do this in code rather than using a rule so that the heuristic
-         // resynch continues to function. For example, one of the suffix characters
-@@ -828,51 +993,28 @@ foundBest:
+@@ -828,51 +1002,28 @@ foundBest:
   * KhmerBreakEngine
   */
  
@@ -536,7 +308,7 @@ index f1c874d..3ad1b3f 100644
  }
  
  KhmerBreakEngine::~KhmerBreakEngine() {
-@@ -884,180 +1027,204 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
+@@ -884,180 +1036,204 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
                                                  int32_t rangeStart,
                                                  int32_t rangeEnd,
                                                  UStack &foundBreaks ) const {
@@ -560,10 +332,10 @@ index f1c874d..3ad1b3f 100644
 +        startZwsp = scanBeforeStart(text, scanStart, breakStart);
 +    }
 +    utext_setNativeIndex(text, rangeStart);
-+    scanFwdClusters(text, rangeEnd, initAfter);
++    scanFwdClusters(text, rangeStart, initAfter);
 +    bool endZwsp = scanAfterEnd(text, utext_nativeLength(text), scanEnd, breakEnd);
 +    utext_setNativeIndex(text, rangeEnd - 1);
-+    scanBackClusters(text, rangeStart, finalBefore);
++    scanBackClusters(text, rangeEnd, finalBefore);
 +    if (finalBefore < initAfter) {   // the whole run is tented so no breaks
 +        if (breakStart || fTypes < UBRK_LINE)
 +            foundBreaks.push(rangeStart, status);
@@ -715,7 +487,7 @@ index f1c874d..3ad1b3f 100644
 +        if (count == 0) {
 +            utext_setNativeIndex(text, ix);
 +            int32_t c = utext_current32(text);
-+            if (fPuncSet.contains(c) || c == ZWSP || c == WJ) {
++            if (fPuncSet.contains(c) || fIgnoreSet.contains(c) || c == ZWSP) {
 +                values.setElementAt(0, count);
 +                lengths.setElementAt(1, count++);
 +            } else if (fBaseSet.contains(c)) {
@@ -767,7 +539,7 @@ index f1c874d..3ad1b3f 100644
 +            int32_t ln = lengths.elementAti(j);
 +            utext_setNativeIndex(text, ln+ix);
 +            int32_t c = utext_current32(text);
-+            while (fPuncSet.contains(c)) {
++            while (fPuncSet.contains(c) || fIgnoreSet.contains(c)) {
 +                ++ln;
 +                utext_next32(text);
 +                c = utext_current32(text);
@@ -887,71 +659,6 @@ index f1c874d..3ad1b3f 100644
  }
  
  #if !UCONFIG_NO_NORMALIZATION
-@@ -1121,7 +1288,7 @@ static inline int32_t utext_i32_flag(int32_t bitIndex) {
-     return (int32_t)1 << bitIndex;
- }
- 
--       
-+
- /*
-  * @param text A UText representing the text
-  * @param rangeStart The start of the range of dictionary characters
-@@ -1129,7 +1296,7 @@ static inline int32_t utext_i32_flag(int32_t bitIndex) {
-  * @param foundBreaks Output of C array of int32_t break positions, or 0
-  * @return The number of breaks found
-  */
--int32_t 
-+int32_t
- CjkBreakEngine::divideUpDictionaryRange( UText *inText,
-         int32_t rangeStart,
-         int32_t rangeEnd,
-@@ -1192,7 +1359,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
-         if (U_FAILURE(status)) {
-             return 0;
-         }
--        
-+
-         UnicodeString fragment;
-         UnicodeString normalizedFragment;
-         for (int32_t srcI = 0; srcI < inString.length();) {  // Once per normalization chunk
-@@ -1261,7 +1428,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
-             }
-         }
-     }
--                
-+
-     // bestSnlp[i] is the snlp of the best segmentation of the first i
-     // code points in the range to be matched.
-     UVector32 bestSnlp(numCodePts + 1, status);
-@@ -1271,7 +1438,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
-     }
- 
- 
--    // prev[i] is the index of the last CJK code point in the previous word in 
-+    // prev[i] is the index of the last CJK code point in the previous word in
-     // the best segmentation of the first i characters.
-     UVector32 prev(numCodePts + 1, status);
-     for(int32_t i = 0; i <= numCodePts; i++){
-@@ -1305,8 +1472,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
-                              // Note: lengths is filled with code point lengths
-                              //       The NULL parameter is the ignored code unit lengths.
- 
--        // if there are no single character matches found in the dictionary 
--        // starting with this charcter, treat character as a 1-character word 
-+        // if there are no single character matches found in the dictionary
-+        // starting with this charcter, treat character as a 1-character word
-         // with the highest value possible, i.e. the least likely to occur.
-         // Exclude Korean characters from this treatment, as they should be left
-         // together by default.
-@@ -1380,7 +1547,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
-         numBreaks++;
-     }
- 
--    // Now that we're done, convert positions in t_boundary[] (indices in 
-+    // Now that we're done, convert positions in t_boundary[] (indices in
-     // the normalized input string) back to indices in the original input UText
-     // while reversing t_boundary and pushing values to foundBreaks.
-     for (int32_t i = numBreaks-1; i >= 0; i--) {
 diff --git a/source/common/dictbe.h b/source/common/dictbe.h
 index d3488cd..26caa75 100644
 --- misc/icu/source/common/dictbe.h
diff --git a/external/icu/khmerdict.dict b/external/icu/khmerdict.dict
index c935cd088659..52605b65469d 100644
Binary files a/external/icu/khmerdict.dict and b/external/icu/khmerdict.dict differ
commit 15b4bad58196d19239d1dff615fa61fe7f15a07f
Author: Martin Hosken <martin_hosken at sil.org>
Date:   Fri Jan 8 16:41:52 2016 +0700

    Fix applying external dict to icu, and khmer break engine fixes
    
    Change-Id: Ib897e5fa5e80f75f501694dbf874aabd92253b25
    Reviewed-on: https://gerrit.libreoffice.org/21247
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Martin Hosken <martin_hosken at sil.org>
    (cherry picked from commit 39b718dd655220110523b7013e65ea4f821aedf7)

diff --git a/external/icu/khmerbreakengine.patch b/external/icu/khmerbreakengine.patch
index 03e6079b19f0..ba3e392a27f3 100644
--- a/external/icu/khmerbreakengine.patch
+++ b/external/icu/khmerbreakengine.patch
@@ -14,8 +14,8 @@ index f1c874d..3ad1b3f 100644
      fTypes = breakTypes;
 +    fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
 +
-+    fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]]"), status);
-+    fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]]"), status);
++    fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]]\\u200C\\u200D\\u2060"), status);
++    fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]]\\u200C\\u200D\\u2060"), status);
 +    fNBeforeSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
  }
  
@@ -473,7 +473,7 @@ index f1c874d..3ad1b3f 100644
          // Look ahead for possible suffixes if a dictionary word does not follow.
          // We do this in code rather than using a rule so that the heuristic
          // resynch continues to function. For example, one of the suffix characters
-@@ -828,51 +993,29 @@ foundBest:
+@@ -828,51 +993,28 @@ foundBest:
   * KhmerBreakEngine
   */
  
@@ -506,7 +506,7 @@ index f1c874d..3ad1b3f 100644
          setCharacters(fKhmerWordSet);
      }
      fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
-     fMarkSet.add(0x0020);
+-    fMarkSet.add(0x0020);
 -    fEndWordSet = fKhmerWordSet;
 -    fBeginWordSet.add(0x1780, 0x17B3);
 -    //fBeginWordSet.add(0x17A3, 0x17A4);      // deprecated vowels
@@ -522,7 +522,7 @@ index f1c874d..3ad1b3f 100644
 -//    fSuffixSet.add(THAI_MAIYAMOK);
 +    fIgnoreSet.add(0x2060);         // WJ
 +    fIgnoreSet.add(0x200C, 0x200D); // ZWJ, ZWNJ
-+    fBaseSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:^M:]]"), status);
++    fBaseSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:lb=SA:]&[:^M:]]"), status);
 +    fPuncSet.applyPattern(UNICODE_STRING_SIMPLE("[\\u17D4\\u17D5\\u17D6\\u17D7\\u17D9:]"), status);
  
      // Compact for caching.
@@ -750,7 +750,7 @@ index f1c874d..3ad1b3f 100644
 -                if (cuWordLength <= 0) {
 -                    wordsFound += 1;
 -                }
-+                } while (fMarkSet.contains(c));
++                } while (fMarkSet.contains(c) || fIgnoreSet.contains(c));
 +                values.setElementAt(BADSNLP, count);
 +                lengths.setElementAt(utext_getNativeIndex(text) - currix, count++);
 +            } else {
@@ -775,7 +775,7 @@ index f1c874d..3ad1b3f 100644
 -            else {
 -                // Back up to where we were for next iteration
 -                utext_setNativeIndex(text, current+cuWordLength);
-+            int32_t ln_j_i = ln + i;
++            int32_t ln_j_i = ln + i;   // yes really i!
 +            if (newSnlp < bestSnlp.elementAti(ln_j_i)) {
 +                if (v == BADSNLP) {
 +                    int32_t p = prev.elementAti(i);
@@ -1395,7 +1395,7 @@ index 816c82d..c637d70 100644
 +#$(MAINBUILDDIR)/khmerdict.stamp: $(TOOLBINDIR)/gendict$(TOOLEXEEXT) $(BRKSRCDIR)/khmerdict.txt build-local
 +# 	$(INVOKE) $(TOOLBINDIR)/gendict --bytes --transform offset-0x1780 -c -i $(BUILDDIR) $(BRKSRCDIR)/khmerdict.txt $(BRKBLDDIR)/khmerdict.dict
 +$(MAINBUILDDIR)/khmerdict.stamp: $(BRKSRCDIR)/khmerdict.dict build-local
-+	cp $< $(MAINBUILDDIR)
++	cp $< $(BRKBLDDIR)
 +	echo "timestamp" > $@
 
  ####################################################    CFU
commit 85d5174a862c78561c4cf85aa7c6ef2ba99d5352
Author: Martin Hosken <martin_hosken at sil.org>
Date:   Sat Dec 12 11:36:53 2015 +0700

    Use .dict files since below the 500K limit
    
    Change-Id: Iec71ad4918cd333f0a44d372017ecee300e3aca9
    Reviewed-on: https://gerrit.libreoffice.org/20748
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Martin Hosken <martin_hosken at sil.org>
    (cherry picked from commit fbb00383d82da5ce375f1b034d3fb9ebdd9a8f0e)

diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
index c48d02556d2d..4a6a11477af3 100644
--- a/external/icu/UnpackedTarball_icu.mk
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -27,6 +27,9 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
 	external/icu/clang-cl.patch.0 \
 	$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.diff) \
 	$(if $(filter EMSCRIPTEN,$(OS)),external/icu/icu4c-emscripten.patch.1) \
+	external/icu/khmerbreakengine.patch \
 ))
 
+$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
+
 # vim: set noet sw=4 ts=4:
diff --git a/external/icu/khmerbreakengine.patch b/external/icu/khmerbreakengine.patch
new file mode 100644
index 000000000000..03e6079b19f0
--- /dev/null
+++ b/external/icu/khmerbreakengine.patch
@@ -0,0 +1,1403 @@
+diff --git a/source/common/dictbe.cpp b/source/common/dictbe.cpp
+index f1c874d..3ad1b3f 100644
+--- misc/icu/source/common/dictbe.cpp
++++ build/icu/source/common/dictbe.cpp
+@@ -27,8 +27,16 @@ U_NAMESPACE_BEGIN
+  ******************************************************************
+  */
+ 
+-DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) {
++DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) :
++    clusterLimit(3)
++{
++    UErrorCode status = U_ZERO_ERROR;
+     fTypes = breakTypes;
++    fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
++
++    fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]]"), status);
++    fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]]"), status);
++    fNBeforeSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
+ }
+ 
+ DictionaryBreakEngine::~DictionaryBreakEngine() {
+@@ -90,7 +98,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
+         result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
+         utext_setNativeIndex(text, current);
+     }
+-    
++
+     return result;
+ }
+ 
+@@ -101,6 +109,163 @@ DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
+     fSet.compact();
+ }
+ 
++bool
++DictionaryBreakEngine::scanBeforeStart(UText *text, int32_t& start, bool &doBreak) const {
++    UErrorCode status = U_ZERO_ERROR;
++    UText* ut = utext_clone(NULL, text, false, true, &status);
++    utext_setNativeIndex(ut, start);
++    UChar32 c = utext_current32(ut);
++    bool res = false;
++    doBreak = true;
++    while (start >= 0) {
++        if (!fSkipStartSet.contains(c)) {
++            res = (c == ZWSP);
++            break;
++        }
++        --start;
++        c = utext_previous32(ut);
++        doBreak = false;
++    }
++    utext_close(ut);
++    return res;
++}
++
++bool
++DictionaryBreakEngine::scanAfterEnd(UText *text, int32_t textEnd, int32_t& end, bool &doBreak) const {
++    UErrorCode status = U_ZERO_ERROR;
++    UText* ut = utext_clone(NULL, text, false, true, &status);
++    utext_setNativeIndex(ut, end);
++    UChar32 c = utext_current32(ut);
++    bool res = false;
++    doBreak = !fNBeforeSet.contains(c);
++    while (end < textEnd) {
++        if (!fSkipEndSet.contains(c)) {
++            res = (c == ZWSP);
++            break;
++        }
++        ++end;

... etc. - the rest is truncated


More information about the Libreoffice-commits mailing list