[poppler] poppler/UTF.cc qt5/tests qt6/tests
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Sat Jul 11 16:31:10 UTC 2020
poppler/UTF.cc | 2 -
qt5/tests/check_utf_conversion.cpp | 36 ++++++++++++++++++++----------
qt6/tests/check_utf_conversion.cpp | 44 ++++++++++++++++++++++++++++++++++++-
3 files changed, 68 insertions(+), 14 deletions(-)
New commits:
commit 969562d387b3791c7bc192a213e74049e08c9395
Author: Albert Astals Cid <aacid at kde.org>
Date: Sat Jul 11 00:41:13 2020 +0200
Fix UTF16LE support in TextStringToUCS4
Make test a bit more complex by using a nice checkbox
Also copy the text to the qt6 folder
diff --git a/poppler/UTF.cc b/poppler/UTF.cc
index ee0314f8..9097b312 100644
--- a/poppler/UTF.cc
+++ b/poppler/UTF.cc
@@ -119,7 +119,7 @@ int TextStringToUCS4(const GooString *textStr, Unicode **ucs4)
if (isUnicode)
utf16[i] = (s[2 + i * 2] & 0xff) << 8 | (s[3 + i * 2] & 0xff);
else // UnicodeLE
- utf16[i] = (s[2 + i * 2] & 0xff) | (s[3 + i * 2] & 0xff) >> 8;
+ utf16[i] = (s[3 + i * 2] & 0xff) << 8 | (s[2 + i * 2] & 0xff);
}
len = UTF16toUCS4(utf16, len, &u);
delete[] utf16;
diff --git a/qt5/tests/check_utf_conversion.cpp b/qt5/tests/check_utf_conversion.cpp
index 1f04c2a5..b153ae5b 100644
--- a/qt5/tests/check_utf_conversion.cpp
+++ b/qt5/tests/check_utf_conversion.cpp
@@ -43,7 +43,17 @@ static bool compare(const Unicode *a, const char *b, int len)
return false;
}
- return *a == (Unicode)*b;
+ return true;
+}
+
+static bool compare(const Unicode *a, const uint16_t *b, int len)
+{
+ for (int i = 0; i < len; i++) {
+ if (a[i] != b[i])
+ return false;
+ }
+
+ return true;
}
void TestUTFConversion::testUTF_data()
@@ -147,32 +157,34 @@ void TestUTFConversion::testUnicodeToAscii7()
void TestUTFConversion::testUnicodeLittleEndian()
{
- uint16_t UTF16LE_hi[4] { 0xFFFE, 0x4800, 0x4900, 0x2100 }; // UTF16-LE "HI!"
- GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), 4 * 2);
+ uint16_t UTF16LE_hi[5] { 0xFFFE, 0x4800, 0x4900, 0x2100, 0x1126 }; // UTF16-LE "HI!☑"
+ GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), sizeof(UTF16LE_hi));
- uint16_t UTF16BE_hi[4] { 0xFEFF, 0x0048, 0x0049, 0x0021 }; // UTF16-BE "HI!"
- GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), 4 * 2);
+ uint16_t UTF16BE_hi[5] { 0xFEFF, 0x0048, 0x0049, 0x0021, 0x2611 }; // UTF16-BE "HI!☑"
+ GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), sizeof(UTF16BE_hi));
// Let's assert both GooString's are different
- Q_ASSERT(GooUTF16LE.cmp(&GooUTF16BE) != 0);
+ QVERIFY(GooUTF16LE.cmp(&GooUTF16BE));
Unicode *UCS4fromLE, *UCS4fromBE;
const int len1 = TextStringToUCS4(&GooUTF16LE, &UCS4fromLE);
const int len2 = TextStringToUCS4(&GooUTF16BE, &UCS4fromBE);
- // 3 as TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points
- Q_ASSERT(len1 == len2);
- Q_ASSERT(len1 == 3);
+ // len is 4 because TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points
+ QCOMPARE(len1, len2);
+ QCOMPARE(len1, 4);
// Check that now after conversion, UCS4fromLE and UCS4fromBE are now the same
for (int i = 0; i < len1; i++) {
- Q_ASSERT(UCS4fromLE[i] == UCS4fromBE[i]);
+ QCOMPARE(UCS4fromLE[i], UCS4fromBE[i]);
}
+ const QString expected = QStringLiteral("HI!☑");
+
// Do some final verifications, checking the strings to be "HI!"
QVERIFY(*UCS4fromLE == *UCS4fromBE);
- QVERIFY(compare(UCS4fromLE, "HI!", 3));
- QVERIFY(compare(UCS4fromBE, "HI!", 3));
+ QVERIFY(compare(UCS4fromLE, expected.utf16(), len1));
+ QVERIFY(compare(UCS4fromBE, expected.utf16(), len1));
}
QTEST_GUILESS_MAIN(TestUTFConversion)
diff --git a/qt6/tests/check_utf_conversion.cpp b/qt6/tests/check_utf_conversion.cpp
index f28829f4..f2a66096 100644
--- a/qt6/tests/check_utf_conversion.cpp
+++ b/qt6/tests/check_utf_conversion.cpp
@@ -18,6 +18,7 @@ private slots:
void testUTF_data();
void testUTF();
void testUnicodeToAscii7();
+ void testUnicodeLittleEndian();
};
static bool compare(const char *a, const char *b)
@@ -41,9 +42,18 @@ static bool compare(const Unicode *a, const char *b, int len)
return false;
}
- return *a == (Unicode)*b;
+ return true;
}
+static bool compare(const Unicode *a, const uint16_t *b, int len)
+{
+ for (int i = 0; i < len; i++) {
+ if (a[i] != b[i])
+ return false;
+ }
+
+ return true;
+}
void TestUTFConversion::testUTF_data()
{
QTest::addColumn<QString>("s");
@@ -143,5 +153,37 @@ void TestUTFConversion::testUnicodeToAscii7()
free(out_ascii_idx);
}
+void TestUTFConversion::testUnicodeLittleEndian()
+{
+ uint16_t UTF16LE_hi[5] { 0xFFFE, 0x4800, 0x4900, 0x2100, 0x1126 }; // UTF16-LE "HI!☑"
+ GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), sizeof(UTF16LE_hi));
+
+ uint16_t UTF16BE_hi[5] { 0xFEFF, 0x0048, 0x0049, 0x0021, 0x2611 }; // UTF16-BE "HI!☑"
+ GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), sizeof(UTF16BE_hi));
+
+ // Let's assert both GooString's are different
+ QVERIFY(GooUTF16LE.cmp(&GooUTF16BE));
+
+ Unicode *UCS4fromLE, *UCS4fromBE;
+ const int len1 = TextStringToUCS4(&GooUTF16LE, &UCS4fromLE);
+ const int len2 = TextStringToUCS4(&GooUTF16BE, &UCS4fromBE);
+
+ // len is 4 because TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points
+ QCOMPARE(len1, len2);
+ QCOMPARE(len1, 4);
+
+ // Check that now after conversion, UCS4fromLE and UCS4fromBE are now the same
+ for (int i = 0; i < len1; i++) {
+ QCOMPARE(UCS4fromLE[i], UCS4fromBE[i]);
+ }
+
+ const QString expected = QStringLiteral("HI!☑");
+
+ // Do some final verifications, checking the strings to be "HI!"
+ QVERIFY(*UCS4fromLE == *UCS4fromBE);
+ QVERIFY(compare(UCS4fromLE, expected.utf16(), len1));
+ QVERIFY(compare(UCS4fromBE, expected.utf16(), len1));
+}
+
QTEST_GUILESS_MAIN(TestUTFConversion)
#include "check_utf_conversion.moc"
More information about the poppler
mailing list