[ooo-build-commit] .: sal/rtl sal/textenc
Kohei Yoshida
kohei at kemper.freedesktop.org
Tue Sep 28 08:29:38 PDT 2010
sal/rtl/source/ustring.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
sal/textenc/tcvtbyte.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
sal/textenc/tcvtlat1.tab | 16 ++++++++--------
sal/textenc/tenchelp.h | 5 +++++
4 files changed, 104 insertions(+), 8 deletions(-)
New commits:
commit 83cefec42b049c78d79bd6d040f1fb68a804b02a
Author: Kohei Yoshida <kyoshida at novell.com>
Date: Tue Sep 28 11:26:03 2010 -0400
Ported sal-strintern-speed-char-upper.diff from ooo-build.
diff --git a/sal/rtl/source/ustring.c b/sal/rtl/source/ustring.c
index 8360322..0c1603f 100644
--- a/sal/rtl/source/ustring.c
+++ b/sal/rtl/source/ustring.c
@@ -800,6 +800,29 @@ void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
}
}
+static int rtl_canGuessUOutputLength( int len, rtl_TextEncoding eTextEncoding )
+{
+ // FIXME: Maybe we should use a bit flag in the higher bits of the
+ // eTextEncoding value itself to determine the encoding type. But if we
+ // do, be sure to mask the value in certain places that expect the values
+ // to be numbered serially from 0 and up. One such place is
+ // Impl_getTextEncodingData().
+
+ switch ( eTextEncoding )
+ {
+ // 1 to 1 (with no zero elements)
+ case RTL_TEXTENCODING_IBM_437:
+ case RTL_TEXTENCODING_IBM_850:
+ case RTL_TEXTENCODING_IBM_860:
+ case RTL_TEXTENCODING_IBM_861:
+ case RTL_TEXTENCODING_IBM_863:
+ case RTL_TEXTENCODING_IBM_865:
+ return len;
+ break;
+ }
+ return 0;
+}
+
void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
const sal_Char * str,
sal_Int32 len,
@@ -817,6 +840,7 @@ void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
if ( len < 256 )
{ // try various optimisations
+ sal_Int32 ulen;
if ( len < 0 )
len = strlen( str );
if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
@@ -836,6 +860,28 @@ void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
return;
}
+ else if ( (ulen = rtl_canGuessUOutputLength(len, eTextEncoding)) != 0 )
+ {
+ rtl_uString *pScratch;
+ rtl_TextToUnicodeConverter hConverter;
+ sal_Size nDestChars, nSrcBytes;
+ sal_uInt32 nInfo;
+
+ pScratch = alloca( sizeof(rtl_uString) + ulen * sizeof (IMPL_RTL_STRCODE) );
+
+ hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
+ nDestChars = rtl_convertTextToUnicode(
+ hConverter, 0, str, len, pScratch->buffer, ulen, convertFlags, &nInfo, &nSrcBytes );
+ rtl_destroyTextToUnicodeConverter( hConverter );
+
+ if (pInfo)
+ *pInfo = nInfo;
+
+ pScratch->length = ulen;
+ rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
+ return;
+ }
+
/* FIXME: we want a nice UTF-8 / alloca shortcut here */
}
diff --git a/sal/textenc/tcvtbyte.c b/sal/textenc/tcvtbyte.c
index 6f211ea..4d40b0a 100644
--- a/sal/textenc/tcvtbyte.c
+++ b/sal/textenc/tcvtbyte.c
@@ -640,6 +640,51 @@ sal_Size ImplCharToUnicode( const ImplTextConverterData* pData,
/* ----------------------------------------------------------------------- */
+sal_Size ImplUpperCharToUnicode( const ImplTextConverterData* pData,
+ void* pContext,
+ const sal_Char* pSrcBuf, sal_Size nSrcBytes,
+ sal_Unicode* pDestBuf, sal_Size nDestChars,
+ sal_uInt32 nFlags, sal_uInt32* pInfo,
+ sal_Size* pSrcCvtBytes )
+{
+ sal_uChar c;
+ sal_Unicode cConv;
+ const ImplByteConvertData* pConvertData = (const ImplByteConvertData*)pData;
+ sal_Unicode* pEndDestBuf;
+ const sal_Char* pEndSrcBuf;
+
+ (void) pContext; /* unused */
+ (void) nFlags; /* unused */
+
+ *pInfo = 0;
+ pEndDestBuf = pDestBuf+nDestChars;
+ pEndSrcBuf = pSrcBuf+nSrcBytes;
+ if ( pDestBuf == pEndDestBuf )
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
+ *pSrcCvtBytes = 0;
+ return 0;
+ }
+ while ( pSrcBuf < pEndSrcBuf )
+ {
+ c = (sal_uChar)*pSrcBuf;
+ if (c < 0x80)
+ cConv = c;
+ else
+ // c <= 0xFF is implied.
+ cConv = pConvertData->mpToUniTab1[c - 0x80];
+
+ *pDestBuf = cConv;
+ pDestBuf++;
+ pSrcBuf++;
+ }
+
+ *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
+ return (nDestChars - (pEndDestBuf-pDestBuf));
+}
+
+/* ----------------------------------------------------------------------- */
+
// Writes 0--2 characters to dest:
static int ImplConvertUnicodeCharToChar(
const ImplByteConvertData* pConvertData, sal_Unicode c, sal_Char * dest )
diff --git a/sal/textenc/tcvtlat1.tab b/sal/textenc/tcvtlat1.tab
index cf47cec..9c0a0e3 100644
--- a/sal/textenc/tcvtlat1.tab
+++ b/sal/textenc/tcvtlat1.tab
@@ -188,7 +188,7 @@ static ImplByteConvertData const aImplIBM437ByteCvtData =
static ImplTextEncodingData const aImplIBM437TextEncodingData
= { { &aImplIBM437ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -322,7 +322,7 @@ static ImplByteConvertData const aImplIBM850ByteCvtData =
static ImplTextEncodingData const aImplIBM850TextEncodingData
= { { &aImplIBM850ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -495,7 +495,7 @@ static ImplByteConvertData const aImplIBM860ByteCvtData =
static ImplTextEncodingData const aImplIBM860TextEncodingData
= { { &aImplIBM860ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -670,7 +670,7 @@ static ImplByteConvertData const aImplIBM861ByteCvtData =
static ImplTextEncodingData const aImplIBM861TextEncodingData
= { { &aImplIBM861ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -845,7 +845,7 @@ static ImplByteConvertData const aImplIBM863ByteCvtData =
static ImplTextEncodingData const aImplIBM863TextEncodingData
= { { &aImplIBM863ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -1020,7 +1020,7 @@ static ImplByteConvertData const aImplIBM865ByteCvtData =
static ImplTextEncodingData const aImplIBM865TextEncodingData
= { { &aImplIBM865ByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -1560,7 +1560,7 @@ static ImplByteConvertData const aImplAPPLEICELANDByteCvtData =
static ImplTextEncodingData const aImplAPPLEICELANDTextEncodingData
= { { &aImplAPPLEICELANDByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
@@ -1707,7 +1707,7 @@ static ImplByteConvertData const aImplAPPLEROMANByteCvtData =
static ImplTextEncodingData const aImplAPPLEROMANTextEncodingData
= { { &aImplAPPLEROMANByteCvtData,
- ImplCharToUnicode,
+ ImplUpperCharToUnicode,
ImplUnicodeToChar,
NULL,
NULL,
diff --git a/sal/textenc/tenchelp.h b/sal/textenc/tenchelp.h
index a756af3..895803a 100644
--- a/sal/textenc/tenchelp.h
+++ b/sal/textenc/tenchelp.h
@@ -230,6 +230,11 @@ sal_Size ImplCharToUnicode( const ImplTextConverterData* pData, void* pContext,
const sal_Char* pSrcBuf, sal_Size nSrcBytes,
sal_Unicode* pDestBuf, sal_Size nDestChars,
sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes );
+/** For those encodings only with unicode range of 0x80 to 0xFF. */
+sal_Size ImplUpperCharToUnicode( const ImplTextConverterData* pData, void* pContext,
+ const sal_Char* pSrcBuf, sal_Size nSrcBytes,
+ sal_Unicode* pDestBuf, sal_Size nDestChars,
+ sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes );
sal_Size ImplUnicodeToChar( const ImplTextConverterData* pData, void* pContext,
const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
sal_Char* pDestBuf, sal_Size nDestBytes,
More information about the ooo-build-commit
mailing list