[ooo-build-commit] .: sal/rtl sal/textenc

Tue Sep 28 08:29:38 PDT 2010

sal/rtl/source/ustring.c |   46 ++++++++++++++++++++++++++++++++++++++++++++++
 sal/textenc/tcvtbyte.c   |   45 +++++++++++++++++++++++++++++++++++++++++++++
 sal/textenc/tcvtlat1.tab |   16 ++++++++--------
 sal/textenc/tenchelp.h   |    5 +++++
 4 files changed, 104 insertions(+), 8 deletions(-)

New commits:
commit 83cefec42b049c78d79bd6d040f1fb68a804b02a
Author: Kohei Yoshida <kyoshida at novell.com>
Date:   Tue Sep 28 11:26:03 2010 -0400

    Ported sal-strintern-speed-char-upper.diff from ooo-build.

diff --git a/sal/rtl/source/ustring.c b/sal/rtl/source/ustring.c
index 8360322..0c1603f 100644
--- a/sal/rtl/source/ustring.c
+++ b/sal/rtl/source/ustring.c
@@ -800,6 +800,29 @@ void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
     }
 }
 
+static int rtl_canGuessUOutputLength( int len, rtl_TextEncoding eTextEncoding )
+{
+    // FIXME: Maybe we should use a bit flag in the higher bits of the
+    // eTextEncoding value itself to determine the encoding type.  But if we
+    // do, be sure to mask the value in certain places that expect the values
+    // to be numbered serially from 0 and up.  One such place is
+    // Impl_getTextEncodingData().
+
+    switch ( eTextEncoding )
+    {
+        // 1 to 1 (with no zero elements)
+        case RTL_TEXTENCODING_IBM_437:
+        case RTL_TEXTENCODING_IBM_850:
+        case RTL_TEXTENCODING_IBM_860:
+        case RTL_TEXTENCODING_IBM_861:
+        case RTL_TEXTENCODING_IBM_863:
+        case RTL_TEXTENCODING_IBM_865:
+            return len;
+        break;
+    }
+    return 0;
+}
+
 void SAL_CALL rtl_uString_internConvert( rtl_uString   ** newStr,
                                          const sal_Char * str,
                                          sal_Int32        len,
@@ -817,6 +840,7 @@ void SAL_CALL rtl_uString_internConvert( rtl_uString   ** newStr,
 
     if ( len < 256 )
     { // try various optimisations
+        sal_Int32 ulen;
         if ( len < 0 )
             len = strlen( str );
         if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
@@ -836,6 +860,28 @@ void SAL_CALL rtl_uString_internConvert( rtl_uString   ** newStr,
             rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
             return;
         }
+        else if ( (ulen = rtl_canGuessUOutputLength(len, eTextEncoding)) != 0 )
+        {
+            rtl_uString *pScratch;
+            rtl_TextToUnicodeConverter hConverter;
+            sal_Size nDestChars, nSrcBytes;
+            sal_uInt32 nInfo;
+
+            pScratch = alloca( sizeof(rtl_uString) + ulen * sizeof (IMPL_RTL_STRCODE) );
+
+            hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
+            nDestChars = rtl_convertTextToUnicode(
+                hConverter, 0, str, len, pScratch->buffer, ulen, convertFlags, &nInfo, &nSrcBytes );
+            rtl_destroyTextToUnicodeConverter( hConverter );
+
+            if (pInfo)
+                *pInfo = nInfo;
+
+            pScratch->length = ulen;
+            rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
+            return;
+        }
+
         /* FIXME: we want a nice UTF-8 / alloca shortcut here */
     }
 
diff --git a/sal/textenc/tcvtbyte.c b/sal/textenc/tcvtbyte.c
index 6f211ea..4d40b0a 100644
--- a/sal/textenc/tcvtbyte.c
+++ b/sal/textenc/tcvtbyte.c
@@ -640,6 +640,51 @@ sal_Size ImplCharToUnicode( const ImplTextConverterData* pData,
 
 /* ----------------------------------------------------------------------- */
 
+sal_Size ImplUpperCharToUnicode( const ImplTextConverterData* pData,
+                            void* pContext,
+                            const sal_Char* pSrcBuf, sal_Size nSrcBytes,
+                            sal_Unicode* pDestBuf, sal_Size nDestChars,
+                            sal_uInt32 nFlags, sal_uInt32* pInfo,
+                            sal_Size* pSrcCvtBytes )
+{
+    sal_uChar                   c;
+    sal_Unicode                 cConv;
+    const ImplByteConvertData*  pConvertData = (const ImplByteConvertData*)pData;
+    sal_Unicode*                pEndDestBuf;
+    const sal_Char*             pEndSrcBuf;
+
+    (void) pContext; /* unused */
+    (void) nFlags;   /* unused */
+
+    *pInfo = 0;
+    pEndDestBuf = pDestBuf+nDestChars;
+    pEndSrcBuf  = pSrcBuf+nSrcBytes;
+    if ( pDestBuf == pEndDestBuf )
+    {
+        *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
+        *pSrcCvtBytes = 0;
+        return 0;
+    }
+    while ( pSrcBuf < pEndSrcBuf )
+    {
+        c = (sal_uChar)*pSrcBuf;
+        if (c < 0x80)
+            cConv = c;
+        else
+            // c <= 0xFF is implied.
+            cConv = pConvertData->mpToUniTab1[c - 0x80];
+
+        *pDestBuf = cConv;
+        pDestBuf++;
+        pSrcBuf++;
+    }
+
+    *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
+    return (nDestChars - (pEndDestBuf-pDestBuf));
+}
+
+/* ----------------------------------------------------------------------- */
+
 // Writes 0--2 characters to dest:
 static int ImplConvertUnicodeCharToChar(
     const ImplByteConvertData* pConvertData, sal_Unicode c, sal_Char * dest )
diff --git a/sal/textenc/tcvtlat1.tab b/sal/textenc/tcvtlat1.tab
index cf47cec..9c0a0e3 100644
--- a/sal/textenc/tcvtlat1.tab
+++ b/sal/textenc/tcvtlat1.tab
@@ -188,7 +188,7 @@ static ImplByteConvertData const aImplIBM437ByteCvtData =
 
 static ImplTextEncodingData const aImplIBM437TextEncodingData
     = { { &aImplIBM437ByteCvtData,
-          ImplCharToUnicode,
+          ImplUpperCharToUnicode,
           ImplUnicodeToChar,
           NULL,
           NULL,
@@ -322,7 +322,7 @@ static ImplByteConvertData const aImplIBM850ByteCvtData =
 
 static ImplTextEncodingData const aImplIBM850TextEncodingData
     = { { &aImplIBM850ByteCvtData,
-          ImplCharToUnicode,
+          ImplUpperCharToUnicode,
           ImplUnicodeToChar,
           NULL,
           NULL,
@@ -495,7 +495,7 @@ static ImplByteConvertData const aImplIBM860ByteCvtData =
 
 static ImplTextEncodingData const aImplIBM860TextEncodingData
     = { { &aImplIBM860ByteCvtData,
-          ImplCharToUnicode,
+          ImplUpperCharToUnicode,
           ImplUnicodeToChar,
           NULL,
           NULL,
@@ -670,7 +670,7 @@ static ImplByteConvertData const aImplIBM861ByteCvtData =
 
 static ImplTextEncodingData const aImplIBM861TextEncodingData
     = { { &aImplIBM861ByteCvtData,
-          ImplCharToUnicode,
+          ImplUpperCharToUnicode,
           ImplUnicodeToChar,
           NULL,
           NULL,
@@ -845,7 +845,7 @@ static ImplByteConvertData const aImplIBM863ByteCvtData =
 
 static ImplTextEncodingData const aImplIBM863TextEncodingData
     = { { &aImplIBM863ByteCvtData,
-          ImplCharToUnicode,
+          ImplUpperCharToUnicode,
           ImplUnicodeToChar,
           NULL,
           NULL,
@@ -1020,7 +1020,7 @@ static ImplByteConvertData const aImplIBM865ByteCvtData =
 
 static ImplTextEncodingData const aImplIBM865TextEncodingData
     = { { &aImplIBM865ByteCvtData,
-          ImplCharToUnicode,
+          ImplUpperCharToUnicode,
           ImplUnicodeToChar,
           NULL,
           NULL,
@@ -1560,7 +1560,7 @@ static ImplByteConvertData const aImplAPPLEICELANDByteCvtData =
 
 static ImplTextEncodingData const aImplAPPLEICELANDTextEncodingData
     = { { &aImplAPPLEICELANDByteCvtData,
-          ImplCharToUnicode,
+          ImplUpperCharToUnicode,
           ImplUnicodeToChar,
           NULL,
           NULL,
@@ -1707,7 +1707,7 @@ static ImplByteConvertData const aImplAPPLEROMANByteCvtData =
 
 static ImplTextEncodingData const aImplAPPLEROMANTextEncodingData
     = { { &aImplAPPLEROMANByteCvtData,
-          ImplCharToUnicode,
+          ImplUpperCharToUnicode,
           ImplUnicodeToChar,
           NULL,
           NULL,
diff --git a/sal/textenc/tenchelp.h b/sal/textenc/tenchelp.h
index a756af3..895803a 100644
--- a/sal/textenc/tenchelp.h
+++ b/sal/textenc/tenchelp.h
@@ -230,6 +230,11 @@ sal_Size ImplCharToUnicode( const ImplTextConverterData* pData, void* pContext,
                             const sal_Char* pSrcBuf, sal_Size nSrcBytes,
                             sal_Unicode* pDestBuf, sal_Size nDestChars,
                             sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes );
+/** For those encodings only with unicode range of 0x80 to 0xFF. */
+sal_Size ImplUpperCharToUnicode( const ImplTextConverterData* pData, void* pContext,
+                            const sal_Char* pSrcBuf, sal_Size nSrcBytes,
+                            sal_Unicode* pDestBuf, sal_Size nDestChars,
+                            sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes );
 sal_Size ImplUnicodeToChar( const ImplTextConverterData* pData, void* pContext,
                             const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
                             sal_Char* pDestBuf, sal_Size nDestBytes,