[Libreoffice-commits] .: sal/Library_sal_textenc.mk sal/qa sal/textenc

Libreoffice Gerrit user logerrit at kemper.freedesktop.org
Tue Nov 27 05:31:53 PST 2012


 sal/Library_sal_textenc.mk            |    1 
 sal/qa/rtl/textenc/rtl_tencinfo.cxx   |    2 
 sal/qa/rtl/textenc/rtl_textcvt.cxx    |   81 ++---
 sal/textenc/convertisciidevangari.cxx |  496 ++++++++++++++++++++++++++++++++++
 sal/textenc/convertisciidevangari.hxx |   34 ++
 sal/textenc/convertisciidevangari.tab |   35 ++
 sal/textenc/tables.cxx                |    5 
 7 files changed, 611 insertions(+), 43 deletions(-)

New commits:
commit bfeeed3e986f3ce5090d5a03c50546cbda3b99e6
Author: Caolán McNamara <caolanm at redhat.com>
Date:   Mon Nov 26 16:22:49 2012 +0000

    implement a new iscii (devangari) <-> unicode converter
    
    this time with support for the multi-byte encodings possible
    in ISCII
    
    Change-Id: I1dc09e8836676ab614b531e8dc10f91a90b7c4fd

diff --git a/sal/Library_sal_textenc.mk b/sal/Library_sal_textenc.mk
index 5d5d40e..972f0ee 100644
--- a/sal/Library_sal_textenc.mk
+++ b/sal/Library_sal_textenc.mk
@@ -41,6 +41,7 @@ $(eval $(call gb_Library_add_exception_objects,sal_textenc,\
 	sal/textenc/convertbig5hkscs \
 	sal/textenc/converteuctw \
 	sal/textenc/convertgb18030 \
+	sal/textenc/convertisciidevangari \
 	sal/textenc/convertiso2022cn \
 	sal/textenc/convertiso2022jp \
 	sal/textenc/convertiso2022kr \
diff --git a/sal/qa/rtl/textenc/rtl_tencinfo.cxx b/sal/qa/rtl/textenc/rtl_tencinfo.cxx
index 7d3ade9..70e2f2e 100644
--- a/sal/qa/rtl/textenc/rtl_tencinfo.cxx
+++ b/sal/qa/rtl/textenc/rtl_tencinfo.cxx
@@ -509,8 +509,8 @@ namespace
         CPPUNIT_TEST( MimeCharsetFromTextEncoding_BIG5_HKSCS );
         CPPUNIT_TEST( MimeCharsetFromTextEncoding_TIS_620 );
         CPPUNIT_TEST( MimeCharsetFromTextEncoding_KOI8_U );
-#if 0
         CPPUNIT_TEST( MimeCharsetFromTextEncoding_ISCII_DEVANAGARI );
+#if 0
         CPPUNIT_TEST( MimeCharsetFromTextEncoding_JAVA_UTF8 );
 #endif
 
diff --git a/sal/qa/rtl/textenc/rtl_textcvt.cxx b/sal/qa/rtl/textenc/rtl_textcvt.cxx
index 3c8c33e..ba7074f 100644
--- a/sal/qa/rtl/textenc/rtl_textcvt.cxx
+++ b/sal/qa/rtl/textenc/rtl_textcvt.cxx
@@ -134,12 +134,13 @@ void testSingleByteCharSet(SingleByteCharSet const & rSet) {
                  | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
                  | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
                 &nInfo, &nConverted);
+
+            sal_uInt32 nExpectedInfo = (RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_UNDEFINED);
+
             CPPUNIT_ASSERT_MESSAGE(
                 "failure #9",
                 (nSize == 0
-                 && (nInfo
-                     == (RTL_TEXTTOUNICODE_INFO_ERROR
-                         | RTL_TEXTTOUNICODE_INFO_UNDEFINED))
+                 && (nInfo == nExpectedInfo)
                  && nConverted == 0));
             rtl_destroyTextToUnicodeContext(aConverter, aContext);
             rtl_destroyTextToUnicodeConverter(aConverter);
@@ -1098,41 +1099,6 @@ void Test::testSingleByte() {
                 0x0425,0x0418,0x0419,0x041A,0x041B,0x041C,0x041D,0x041E,
                 0x041F,0x042F,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
                 0x042C,0x042B,0x0417,0x0428,0x042D,0x0429,0x0427,0x042A } },
-#if 0
-            { RTL_TEXTENCODING_ISCII_DEVANAGARI,
-              { 0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
-                0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
-                0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
-                0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
-                0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
-                0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
-                0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
-                0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
-                0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
-                0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
-                0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
-                0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
-                0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
-                0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
-                0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
-                0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
-                0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
-                0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
-                0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
-                0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
-                0xFFFF,0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,
-                0x0909,0x090A,0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,
-                0x0913,0x0914,0x0911,0x0915,0x0916,0x0917,0x0918,0x0919,
-                0x091A,0x091B,0x091C,0x091D,0x091E,0x091F,0x0920,0x0921,
-                0x0922,0x0923,0x0924,0x0925,0x0926,0x0927,0x0928,0x0929,
-                0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,0x095F,0x0930,
-                0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,0x0938,
-                0x0939,0xFFFF,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
-                0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,
-                0x094D,0x093C,0x0964,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
-                0xFFFF,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
-                0x096D,0x096E,0x096F,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF } },
-#endif
             { RTL_TEXTENCODING_ADOBE_STANDARD,
               { 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
                 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
@@ -2498,6 +2464,43 @@ void Test::testComplex() {
               false,
               true,
               false,
+              RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR },
+            { RTL_TEXTENCODING_ISCII_DEVANAGARI,
+              RTL_CONSTASCII_STRINGPARAM(
+                  "\xD7\xE6\x20\xD4\xCF\xE8\xD6\x20"
+                  "\xC8\xD8\xD1\xE1\x20\xB3\xCA\xDC"
+                  "\xCF\xC4\xDA\xD7\x20\xD8\xDB\xA2"
+                  "\xC4\xDE\x20\xB1\xCF\x20\xCC\xDD"
+                  "\xD7\xD1\xCC\xDA\xC6\x20\xC4\xE5"
+                  "\xC6\xE5\xA2\x20\xB3\xE1\x20\xB3"
+                  "\xBD\xE8\xBD\xCF\xC8\xC6\x20\xB3"
+                  "\xE5\x20\xC9\xBD\xB3\xDA\xCF\x20"
+                  "\xB8\xDD\xB3\xE1\x20\xC3\xE1\x20"
+                  "\xEA"),
+              { 0x0938, 0x094C, 0x0020, 0x0935, 0x0930, 0x094D, 0x0937, 0x0020,
+                0x092A, 0x0939, 0x0932, 0x0947, 0x0020, 0x0915, 0x092C, 0x0940,
+                0x0930, 0x0926, 0x093E, 0x0938, 0x0020, 0x0939, 0x093F, 0x0902,
+                0x0926, 0x0942, 0x0020, 0x0914, 0x0930, 0x0020, 0x092E, 0x0941,
+                0x0938, 0x0932, 0x092E, 0x093E, 0x0928, 0x0020, 0x0926, 0x094B,
+                0x0928, 0x094B, 0x0902, 0x0020, 0x0915, 0x0947, 0x0020, 0x0915,
+                0x091F, 0x094D, 0x091F, 0x0930, 0x092A, 0x0928, 0x0020, 0x0915,
+                0x094B, 0x0020, 0x092B, 0x091F, 0x0915, 0x093E, 0x0930, 0x0020,
+                0x091A, 0x0941, 0x0915, 0x0947, 0x0020, 0x0925, 0x0947, 0x0020,
+                0x0964 },
+              73,
+              false,
+              true,
+              true,
+              false,
+              RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR },
+            { RTL_TEXTENCODING_ISCII_DEVANAGARI,
+              RTL_CONSTASCII_STRINGPARAM("\xE8\xE8\xE8\xE9\xA1\xE9\xEA\xE9"),
+              { 0x094D, 0x200C, 0x094D, 0x200D, 0x0950, 0x93D },
+              6,
+              false,
+              true,
+              true,
+              false,
               RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR }
         };
     for (std::size_t i = 0; i < SAL_N_ELEMENTS(data); ++i) {
@@ -2870,10 +2873,8 @@ void Test::testInfo() {
         { RTL_TEXTENCODING_IBM_861, RTL_TEXTENCODING_INFO_MIME, true },
         { RTL_TEXTENCODING_IBM_863, RTL_TEXTENCODING_INFO_MIME, true },
         { RTL_TEXTENCODING_IBM_865, RTL_TEXTENCODING_INFO_MIME, true },
-#if 0
         { RTL_TEXTENCODING_ISCII_DEVANAGARI, RTL_TEXTENCODING_INFO_ASCII, true },
         { RTL_TEXTENCODING_ISCII_DEVANAGARI, RTL_TEXTENCODING_INFO_MIME, false },
-#endif
         { RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_ASCII, false },
         { RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_MIME, true },
         { RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_SYMBOL, false },
diff --git a/sal/textenc/convertisciidevangari.cxx b/sal/textenc/convertisciidevangari.cxx
new file mode 100644
index 0000000..8adeb4b
--- /dev/null
+++ b/sal/textenc/convertisciidevangari.cxx
@@ -0,0 +1,496 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "converter.hxx"
+#include "convertisciidevangari.hxx"
+#include "convertsinglebytetobmpunicode.hxx"
+#include <rtl/textcvt.h>
+
+using namespace sal::detail::textenc;
+using namespace rtl::textenc;
+
+struct IsciiDevanagariToUnicode
+{
+    sal_uInt8 m_cPrevChar;
+    IsciiDevanagariToUnicode()
+        : m_cPrevChar(0)
+    {
+    }
+    void reset()
+    {
+        m_cPrevChar = 0;
+    }
+    sal_Size convert(char const* pSrcBuf, sal_Size nSrcBytes,
+        sal_Unicode* pDestBuf, sal_Size nDestChars, sal_uInt32 nFlags,
+        sal_uInt32* pInfo, sal_Size* pSrcCvtBytes);
+};
+
+struct UnicodeToIsciiDevanagari
+{
+    sal_Unicode m_cPrevChar;
+    sal_Unicode m_cHighSurrogate;
+    UnicodeToIsciiDevanagari()
+        : m_cPrevChar(0)
+        , m_cHighSurrogate(0)
+    {
+    }
+    void reset()
+    {
+        m_cPrevChar = 0;
+        m_cHighSurrogate = 0;
+    }
+    sal_Size convert(sal_Unicode const* pSrcBuf, sal_Size nSrcChars,
+        char* pDestBuf, sal_Size nDestBytes, sal_uInt32 nFlags,
+        sal_uInt32 * pInfo, sal_Size * pSrcCvtChars);
+};
+
+static const sal_Unicode IsciiDevanagariMap[256] =
+{
+    0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
+    0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
+    0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
+    0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
+    0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
+    0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
+    0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
+    0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
+    0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
+    0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
+    0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
+    0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
+    0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
+    0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
+    0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
+    0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
+    0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
+    0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
+    0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
+    0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
+    0xFFFF,0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,
+    0x0909,0x090A,0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,
+    0x0913,0x0914,0x0911,0x0915,0x0916,0x0917,0x0918,0x0919,
+    0x091A,0x091B,0x091C,0x091D,0x091E,0x091F,0x0920,0x0921,
+    0x0922,0x0923,0x0924,0x0925,0x0926,0x0927,0x0928,0x0929,
+    0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,0x095F,0x0930,
+    0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,0x0938,
+    0x0939,0xFFFF,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
+    0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,
+    0x094D,0x093C,0x0964,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
+    0xFFFF,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
+    0x096D,0x096E,0x096F,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF
+};
+
+sal_Size IsciiDevanagariToUnicode::convert(
+    char const* pSrcBuf, sal_Size nSrcBytes,
+    sal_Unicode* pDestBuf, sal_Size nDestChars, sal_uInt32 nFlags,
+    sal_uInt32* pInfo, sal_Size* pSrcCvtBytes)
+{
+    sal_uInt32 nInfo = 0;
+    sal_Size nConverted = 0;
+    sal_Unicode* pDestBufPtr = pDestBuf;
+    sal_Unicode* pDestBufEnd = pDestBuf + nDestChars;
+
+    while (nConverted < nSrcBytes)
+    {
+        if (pDestBufPtr == pDestBufEnd)
+        {
+            nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
+            break;
+        }
+
+        sal_Unicode cChar;
+        sal_uInt8 nIn = static_cast<sal_uInt8>(pSrcBuf[nConverted]);
+        sal_uInt8 nNext = nConverted < nSrcBytes + 1 ? static_cast<sal_uInt8>(pSrcBuf[nConverted+1]) : 0;
+        bool bNormal = true;
+        bool bDouble = false;
+        //halant + halant     E8 E8  -> halant + ZWNJ   094D 200C
+        //halant + nukta    E8 E9   halant + ZWJ    094D 200D
+        if (m_cPrevChar == 0xE8 && nIn == 0xE8)
+        {
+            cChar = 0x200C;
+            bNormal = false;
+        }
+        else if (m_cPrevChar == 0xE8 && nIn == 0xE9)
+        {
+            cChar = 0x200D;
+            bNormal = false;
+        }
+        else if (nNext == 0xE9)
+        {
+            bNormal = false;
+            bDouble = true;
+            switch(nIn)
+            {
+                case 0xA1:
+                    cChar = 0x0950;
+                    break;
+                case 0xA6:
+                    cChar = 0x090C;
+                    break;
+                case 0xA7:
+                    cChar = 0x0961;
+                    break;
+                case 0xAA:
+                    cChar = 0x0960;
+                    break;
+                case 0xB3:
+                    cChar = 0x0958;
+                    break;
+                case 0xB4:
+                    cChar = 0x0959;
+                    break;
+                case 0xB5:
+                    cChar = 0x095A;
+                    break;
+                case 0xBA:
+                    cChar = 0x095B;
+                    break;
+                case 0xBF:
+                    cChar = 0x095C;
+                    break;
+                case 0xC0:
+                    cChar = 0x095D;
+                    break;
+                case 0xC9:
+                    cChar = 0x095E;
+                    break;
+                case 0xDB:
+                    cChar = 0x0962;
+                    break;
+                case 0xDC:
+                    cChar = 0x0963;
+                    break;
+                case 0xDF:
+                    cChar = 0x0944;
+                    break;
+                case 0xEA:
+                    cChar = 0x093D;
+                    break;
+                default:
+                    bNormal = true;
+                    bDouble = false;
+                    break;
+            };
+        }
+
+        if (bNormal)
+            cChar = IsciiDevanagariMap[nIn];
+
+        bool bUndefined = cChar == 0xffff;
+
+        if (bUndefined)
+        {
+            BadInputConversionAction eAction = handleBadInputTextToUnicodeConversion(
+                        bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
+                        &nInfo);
+            if (eAction == BAD_INPUT_CONTINUE)
+                continue;
+            if (eAction == BAD_INPUT_STOP)
+                break;
+            else if (eAction == BAD_INPUT_NO_OUTPUT)
+            {
+                nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
+                break;
+            }
+        }
+        ++nConverted;
+        if (bDouble)
+            ++nConverted;
+
+        *pDestBufPtr++ = cChar;
+        m_cPrevChar = bNormal ? nIn : 0;
+    }
+
+    if (pInfo)
+        *pInfo = nInfo;
+    if (pSrcCvtBytes)
+        *pSrcCvtBytes = nConverted;
+
+    return pDestBufPtr - pDestBuf;
+}
+
+BmpUnicodeToSingleByteRange const unicodeToISCIIEncoding[] =
+{
+    { 0x0000, 0x007F - 0x0000, 0x00 }, { 0x0901, 0x0903 - 0x0901, 0xA1 },
+    { 0x0905, 0x090B - 0x0905, 0xA4 }, { 0x090D, 0x090D - 0x090D, 0xAE },
+    { 0x090E, 0x0910 - 0x090E, 0xAB }, { 0x0911, 0x0911 - 0x0911, 0xB2 },
+    { 0x0912, 0x0914 - 0x0912, 0xAF }, { 0x0915, 0x092F - 0x0915, 0xB3 },
+    { 0x0930, 0x0939 - 0x0930, 0xCF }, { 0x093C, 0x093C - 0x093C, 0xE9 },
+    { 0x093E, 0x0943 - 0x093E, 0xDA }, { 0x0945, 0x0945 - 0x0945, 0xE3 },
+    { 0x0946, 0x0948 - 0x0946, 0xE0 }, { 0x0949, 0x0949 - 0x0949, 0xE7 },
+    { 0x094A, 0x094C - 0x094A, 0xE4 }, { 0x094D, 0x094D - 0x094D, 0xE8 },
+    { 0x095F, 0x095F - 0x095F, 0xCE }, { 0x0964, 0x0964 - 0x0964, 0xEA },
+    { 0x0966, 0x096F - 0x0966, 0xF1 }
+};
+
+sal_Size UnicodeToIsciiDevanagari::convert(sal_Unicode const* pSrcBuf, sal_Size nSrcChars,
+    char* pDestBuf, sal_Size nDestBytes, sal_uInt32 nFlags,
+    sal_uInt32 * pInfo, sal_Size* pSrcCvtChars)
+{
+    size_t entries = SAL_N_ELEMENTS(unicodeToISCIIEncoding);
+    BmpUnicodeToSingleByteRange const * ranges = unicodeToISCIIEncoding;
+
+    sal_Unicode cHighSurrogate = m_cHighSurrogate;
+    sal_uInt32 nInfo = 0;
+    sal_Size nConverted = 0;
+    sal_Char* pDestBufPtr = pDestBuf;
+    sal_Char* pDestBufEnd = pDestBuf + nDestBytes;
+    for (; nConverted < nSrcChars; ++nConverted)
+    {
+        bool bUndefined = true;
+        sal_uInt32 c = *pSrcBuf++;
+        sal_Char cSpecialChar = 0;
+        if (cHighSurrogate == 0)
+        {
+            if (ImplIsHighSurrogate(c))
+            {
+                cHighSurrogate = static_cast< sal_Unicode >(c);
+                continue;
+            }
+        }
+        else if (ImplIsLowSurrogate(c))
+        {
+            c = ImplCombineSurrogates(cHighSurrogate, c);
+        }
+        else
+        {
+            bUndefined = false;
+            goto bad_input;
+        }
+        if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c))
+        {
+            bUndefined = false;
+            goto bad_input;
+        }
+
+        //halant + halant     E8 E8  -> halant + ZWNJ   094D 200C
+        //halant + nukta    E8 E9   halant + ZWJ    094D 200D
+        if (m_cPrevChar == 0x094D && c == 0x200C)
+            cSpecialChar = 0xE8;
+        else if (m_cPrevChar == 0x094D && c == 0x200D)
+            cSpecialChar = 0xE9;
+        if (cSpecialChar)
+        {
+            if (pDestBufEnd - pDestBufPtr < 1)
+            {
+                nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
+                break;
+            }
+            *pDestBufPtr++ = cSpecialChar;
+            m_cPrevChar = 0;
+            goto done;
+        }
+        switch (c)
+        {
+            case 0x0950:
+                cSpecialChar = 0xA1;
+                break;
+            case 0x090C:
+                cSpecialChar = 0xA6;
+                break;
+            case 0x0961:
+                cSpecialChar = 0xA7;
+                break;
+            case 0x0960:
+                cSpecialChar = 0xAA;
+                break;
+            case 0x0958:
+                cSpecialChar = 0xB3;
+                break;
+            case 0x0959:
+                cSpecialChar = 0xB4;
+                break;
+            case 0x095A:
+                cSpecialChar = 0xB5;
+                break;
+            case 0x095B:
+                cSpecialChar = 0xBA;
+                break;
+            case 0x095C:
+                cSpecialChar = 0xBF;
+                break;
+            case 0x095D:
+                cSpecialChar = 0xC0;
+                break;
+            case 0x095E:
+                cSpecialChar = 0xC9;
+                break;
+            case 0x0962:
+                cSpecialChar = 0xDB;
+                break;
+            case 0x0963:
+                cSpecialChar = 0xDC;
+                break;
+            case 0x0944:
+                cSpecialChar = 0xDF;
+                break;
+            case 0x093D:
+                cSpecialChar = 0xEA;
+                break;
+            default:
+                break;
+        }
+        if (cSpecialChar)
+        {
+            if (pDestBufEnd - pDestBufPtr < 2)
+            {
+                nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
+                break;
+            }
+            *pDestBufPtr++ = cSpecialChar;
+            *pDestBufPtr++ = 0xE9;
+            m_cPrevChar = 0;
+            goto done;
+        }
+
+        // Linearly searching through the ranges if probably fastest, assuming
+        // that most converted characters belong to the ASCII subset:
+        for (size_t i = 0; i < entries; ++i)
+        {
+            if (c < ranges[i].unicode)
+            {
+                break;
+            }
+            else if (c <= sal::static_int_cast< sal_uInt32 >(
+                           ranges[i].unicode + ranges[i].range))
+            {
+                if (pDestBufEnd - pDestBufPtr < 1)
+                {
+                    goto no_output;
+                }
+                *pDestBufPtr++ = static_cast< sal_Char >(
+                    ranges[i].byte + (c - ranges[i].unicode));
+                m_cPrevChar = c;
+                goto done;
+            }
+        }
+        goto bad_input;
+    done:
+        cHighSurrogate = 0;
+        continue;
+    bad_input:
+        switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
+                    bUndefined, c, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo, 0,
+                    0, 0))
+        {
+        case sal::detail::textenc::BAD_INPUT_STOP:
+            cHighSurrogate = 0;
+            break;
+
+        case sal::detail::textenc::BAD_INPUT_CONTINUE:
+            cHighSurrogate = 0;
+            continue;
+
+        case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
+            goto no_output;
+        }
+        break;
+    no_output:
+        --pSrcBuf;
+        nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
+        break;
+    }
+
+    if (cHighSurrogate != 0
+        && ((nInfo
+             & (RTL_UNICODETOTEXT_INFO_ERROR
+                | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
+            == 0))
+    {
+        if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
+        {
+            nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
+        }
+        else
+        {
+            switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
+                        false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo, 0,
+                        0, 0))
+            {
+            case sal::detail::textenc::BAD_INPUT_STOP:
+            case sal::detail::textenc::BAD_INPUT_CONTINUE:
+                cHighSurrogate = 0;
+                break;
+
+            case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
+                nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
+                break;
+            }
+        }
+    }
+    m_cHighSurrogate = cHighSurrogate;
+    if (pInfo)
+        *pInfo = nInfo;
+    if (pSrcCvtChars)
+        *pSrcCvtChars = nConverted;
+
+    return pDestBufPtr - pDestBuf;
+}
+
+sal_Size ImplConvertIsciiDevanagariToUnicode(void const*,
+    void* pContext, char const* pSrcBuf, sal_Size nSrcBytes,
+    sal_Unicode* pDestBuf, sal_Size nDestChars, sal_uInt32 nFlags,
+    sal_uInt32* pInfo, sal_Size* pSrcCvtBytes)
+{
+    IsciiDevanagariToUnicode *pCtx =
+        static_cast<IsciiDevanagariToUnicode*>(pContext);
+    return pCtx->convert(pSrcBuf, nSrcBytes, pDestBuf, nDestChars, nFlags,
+        pInfo, pSrcCvtBytes);
+}
+
+sal_Size ImplConvertUnicodeToIsciiDevanagari(void const*,
+    void * pContext, sal_Unicode const * pSrcBuf, sal_Size nSrcChars,
+    char * pDestBuf, sal_Size nDestBytes, sal_uInt32 nFlags,
+    sal_uInt32 * pInfo, sal_Size * pSrcCvtChars)
+{
+    UnicodeToIsciiDevanagari *pCtx =
+        static_cast<UnicodeToIsciiDevanagari*>(pContext);
+    return pCtx->convert(pSrcBuf, nSrcChars,
+        pDestBuf, nDestBytes, nFlags, pInfo, pSrcCvtChars);
+}
+
+void *ImplCreateIsciiDevanagariToUnicodeContext()
+{
+    return new IsciiDevanagariToUnicode;
+}
+
+void ImplDestroyIsciiDevanagariToUnicodeContext(void * pContext)
+{
+    IsciiDevanagariToUnicode *pCtx =
+        static_cast<IsciiDevanagariToUnicode*>(pContext);
+    delete pCtx;
+}
+
+void ImplResetIsciiDevanagariToUnicodeContext(void * pContext)
+{
+    IsciiDevanagariToUnicode *pCtx =
+        static_cast<IsciiDevanagariToUnicode*>(pContext);
+    pCtx->reset();
+}
+
+void *ImplCreateUnicodeToIsciiDevanagariContext()
+{
+    return new UnicodeToIsciiDevanagari;
+}
+
+void ImplResetUnicodeToIsciiDevanagariContext(void * pContext)
+{
+    UnicodeToIsciiDevanagari *pCtx =
+        static_cast<UnicodeToIsciiDevanagari*>(pContext);
+    pCtx->reset();
+}
+
+void ImplDestroyUnicodeToIsciiDevanagariContext(void * pContext)
+{
+    UnicodeToIsciiDevanagari *pCtx =
+        static_cast<UnicodeToIsciiDevanagari*>(pContext);
+    delete pCtx;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sal/textenc/convertisciidevangari.hxx b/sal/textenc/convertisciidevangari.hxx
new file mode 100644
index 0000000..b3f5f30
--- /dev/null
+++ b/sal/textenc/convertisciidevangari.hxx
@@ -0,0 +1,34 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <sal/types.h>
+
+sal_Size ImplConvertIsciiDevanagariToUnicode(void const * pData,
+    void * pContext, char const * pSrcBuf, sal_Size nSrcBytes,
+    sal_Unicode * pDestBuf, sal_Size nDestChars, sal_uInt32 nFlags,
+    sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes);
+
+sal_Size ImplConvertUnicodeToIsciiDevanagari(void const * pData,
+    void * pContext, sal_Unicode const * pSrcBuf, sal_Size nSrcChars,
+    char * pDestBuf, sal_Size nDestBytes, sal_uInt32 nFlags,
+    sal_uInt32 * pInfo, sal_Size * pSrcCvtChars);
+
+void *ImplCreateIsciiDevanagariToUnicodeContext();
+
+void ImplDestroyIsciiDevanagariToUnicodeContext(void * pContext);
+
+void ImplResetIsciiDevanagariToUnicodeContext(void * pContext);
+
+void *ImplCreateUnicodeToIsciiDevanagariContext();
+
+void ImplResetUnicodeToIsciiDevanagariContext(void * pContext);
+
+void ImplDestroyUnicodeToIsciiDevanagariContext(void * pContext);
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sal/textenc/convertisciidevangari.tab b/sal/textenc/convertisciidevangari.tab
new file mode 100644
index 0000000..6c4a24b
--- /dev/null
+++ b/sal/textenc/convertisciidevangari.tab
@@ -0,0 +1,35 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "sal/config.h"
+
+#include "rtl/tencinfo.h"
+#include "rtl/textenc.h"
+
+#include "convertisciidevangari.hxx"
+
+static ImplTextEncodingData const aImplIsciiDevanagariTextEncodingData
+    = { { NULL,
+          &ImplConvertIsciiDevanagariToUnicode,
+          &ImplConvertUnicodeToIsciiDevanagari,
+          &ImplCreateIsciiDevanagariToUnicodeContext,
+          &ImplDestroyIsciiDevanagariToUnicodeContext,
+          &ImplResetIsciiDevanagariToUnicodeContext,
+          &ImplCreateUnicodeToIsciiDevanagariContext,
+          &ImplResetUnicodeToIsciiDevanagariContext,
+          &ImplDestroyUnicodeToIsciiDevanagariContext },
+        1,
+        2,
+        2,
+        1,
+        NULL,
+        "x-iscii-de ",
+        RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE };
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sal/textenc/tables.cxx b/sal/textenc/tables.cxx
index 7400abb..bdb391e 100644
--- a/sal/textenc/tables.cxx
+++ b/sal/textenc/tables.cxx
@@ -100,13 +100,14 @@ static sal_uInt16 const aImplDoubleByteIdentifierTab[1] = { 0 };
 #include "tcvttcn6.tab"
 #include "tcvtuni1.tab"
 
+#include "convertadobe.tab"
 #include "convertbig5hkscs.tab"
 #include "converteuctw.tab"
 #include "convertgb18030.tab"
+#include "convertisciidevangari.tab"
 #include "convertiso2022cn.tab"
 #include "convertiso2022jp.tab"
 #include "convertiso2022kr.tab"
-#include "convertadobe.tab"
 
 extern "C" SAL_DLLPUBLIC_EXPORT ImplTextEncodingData const *
     sal_getFullTextEncodingData( rtl_TextEncoding nEncoding )
@@ -208,7 +209,7 @@ extern "C" SAL_DLLPUBLIC_EXPORT ImplTextEncodingData const *
             &aImplBig5HkscsTextEncodingData, /* BIG5_HKSCS */
             &aImplTis620TextEncodingData, /* TIS_620 */
             &aImplKoi8UTextEncodingData, /* KOI8_U */
-            NULL, /* TODO! ISCII_DEVANAGARI */
+            &aImplIsciiDevanagariTextEncodingData, /* ISCII_DEVANAGARI */
             NULL, /* JAVA_UTF8, see above */
             &adobeStandardEncodingData, /* ADOBE_STANDARD */
             &adobeSymbolEncodingData, /* ADOBE_SYMBOL */


More information about the Libreoffice-commits mailing list