[Libreoffice-commits] core.git: 3 commits - filter/source sal/rtl
Michael Stahl
mstahl at redhat.com
Thu Nov 6 09:51:44 PST 2014
filter/source/xmlfilterdetect/filterdetect.cxx | 7 ++++++-
sal/rtl/ustring.cxx | 24 +++++++++++++-----------
2 files changed, 19 insertions(+), 12 deletions(-)
New commits:
commit ba08fe174660dc4abdda0aec02dc52b9c7fdb7b5
Author: Michael Stahl <mstahl at redhat.com>
Date: Thu Nov 6 18:32:35 2014 +0100
filter: use more appropriate conversion from ASCII for arbitrary input
... with unknown encoding, mapping non-ASCII chars to something that
doesn't cause trouble (in FilterDetect::detect()).
Change-Id: Ibf2a2e2fd7c0c00e55042d2ccad173fab7a1b0bd
diff --git a/filter/source/xmlfilterdetect/filterdetect.cxx b/filter/source/xmlfilterdetect/filterdetect.cxx
index a6f0089..b4d614f 100644
--- a/filter/source/xmlfilterdetect/filterdetect.cxx
+++ b/filter/source/xmlfilterdetect/filterdetect.cxx
@@ -115,7 +115,12 @@ OUString SAL_CALL FilterDetect::detect( com::sun::star::uno::Sequence< com::sun:
}
if ( nUniPos == 3 || ( nUniPos == 0 && !bTryUtf16 ) ) // UTF-8 or non-Unicode
- resultString = OStringToOUString( read_uInt8s_ToOString( *pInStream, nSize ), RTL_TEXTENCODING_UTF8 );
+ {
+ OString const str(read_uInt8s_ToOString(*pInStream, nSize));
+ resultString = OUString(str.getStr(), str.getLength(),
+ RTL_TEXTENCODING_ASCII_US,
+ RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT);
+ }
else if ( nUniPos == 2 || bTryUtf16 ) // UTF-16
resultString = read_uInt16s_ToOUString( *pInStream, nSize );
commit aaccd8560e555d7e3cf062ca3672b77702397a70
Author: Michael Stahl <mstahl at redhat.com>
Date: Thu Nov 6 18:31:24 2014 +0100
sal: it is impossible to have non-ASCII char here
...because rtl_ImplGetFastUTF8UnicodeLen has already checked that.
Change-Id: I17f2b80f374073934a8f0b1a97099d4dec89ce4e
diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx
index 1d30dcc..3e4f5c3 100644
--- a/sal/rtl/ustring.cxx
+++ b/sal/rtl/ustring.cxx
@@ -696,10 +696,7 @@ retry:
pBuffer = (*ppThis)->buffer;
do
{
- /* Check ASCII range */
- SAL_WARN_IF( ((unsigned char)*pStr) > 127, "rtl.string",
- "rtl_string2UString_status() - UTF8 test encoding is wrong" );
-
+ assert(((unsigned char)*pStr) <= 127);
*pBuffer = *pStr;
pBuffer++;
pStr++;
commit 5936a64c2aa275992ce231183b35711d8da74ace
Author: Michael Stahl <mstahl at redhat.com>
Date: Thu Nov 6 18:27:25 2014 +0100
sal: do not ignore conversion flags for RTL_TEXTENCODING_ASCII_US
Keep the fast path fast, fall back to the text encoder in case there's a
fly in the ointment.
Change-Id: I94507856a7f3170f770adb741aa1e282d0d2400c
diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx
index c7622bd..1d30dcc 100644
--- a/sal/rtl/ustring.cxx
+++ b/sal/rtl/ustring.cxx
@@ -641,23 +641,28 @@ static void rtl_string2UString_status( rtl_uString** ppThis,
return;
}
pBuffer = (*ppThis)->buffer;
+ sal_Int32 nLenCopy(nLen);
+ const sal_Char *pStrCopy(pStr);
do
{
/* Check ASCII range */
- SAL_WARN_IF( ((unsigned char)*pStr) > 127, "rtl.string",
- "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
+ if (static_cast<unsigned char>(*pStrCopy) > 127)
+ {
+ rtl_uString_release(*ppThis);
+ goto retry; // cancel loop - try again with the converter
+ }
- *pBuffer = *pStr;
+ *pBuffer = *pStrCopy;
pBuffer++;
- pStr++;
- nLen--;
+ pStrCopy++;
+ nLenCopy--;
}
- while ( nLen );
+ while (nLenCopy);
if (pInfo != NULL) {
*pInfo = 0;
}
}
- else
+retry:
{
rtl_uString* pTemp;
rtl_uString* pTemp2 = NULL;
More information about the Libreoffice-commits
mailing list