[poppler] poppler/UTF.cc qt5/tests qt6/tests

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sat Jul 11 16:31:10 UTC 2020


 poppler/UTF.cc                     |    2 -
 qt5/tests/check_utf_conversion.cpp |   36 ++++++++++++++++++++----------
 qt6/tests/check_utf_conversion.cpp |   44 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 68 insertions(+), 14 deletions(-)

New commits:
commit 969562d387b3791c7bc192a213e74049e08c9395
Author: Albert Astals Cid <aacid at kde.org>
Date:   Sat Jul 11 00:41:13 2020 +0200

    Fix UTF16LE support in TextStringToUCS4
    
    Make test a bit more complex by using a nice checkbox
    
    Also copy the text to the qt6 folder

diff --git a/poppler/UTF.cc b/poppler/UTF.cc
index ee0314f8..9097b312 100644
--- a/poppler/UTF.cc
+++ b/poppler/UTF.cc
@@ -119,7 +119,7 @@ int TextStringToUCS4(const GooString *textStr, Unicode **ucs4)
                 if (isUnicode)
                     utf16[i] = (s[2 + i * 2] & 0xff) << 8 | (s[3 + i * 2] & 0xff);
                 else // UnicodeLE
-                    utf16[i] = (s[2 + i * 2] & 0xff) | (s[3 + i * 2] & 0xff) >> 8;
+                    utf16[i] = (s[3 + i * 2] & 0xff) << 8 | (s[2 + i * 2] & 0xff);
             }
             len = UTF16toUCS4(utf16, len, &u);
             delete[] utf16;
diff --git a/qt5/tests/check_utf_conversion.cpp b/qt5/tests/check_utf_conversion.cpp
index 1f04c2a5..b153ae5b 100644
--- a/qt5/tests/check_utf_conversion.cpp
+++ b/qt5/tests/check_utf_conversion.cpp
@@ -43,7 +43,17 @@ static bool compare(const Unicode *a, const char *b, int len)
             return false;
     }
 
-    return *a == (Unicode)*b;
+    return true;
+}
+
+static bool compare(const Unicode *a, const uint16_t *b, int len)
+{
+    for (int i = 0; i < len; i++) {
+        if (a[i] != b[i])
+            return false;
+    }
+
+    return true;
 }
 
 void TestUTFConversion::testUTF_data()
@@ -147,32 +157,34 @@ void TestUTFConversion::testUnicodeToAscii7()
 
 void TestUTFConversion::testUnicodeLittleEndian()
 {
-    uint16_t UTF16LE_hi[4] { 0xFFFE, 0x4800, 0x4900, 0x2100 }; // UTF16-LE "HI!"
-    GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), 4 * 2);
+    uint16_t UTF16LE_hi[5] { 0xFFFE, 0x4800, 0x4900, 0x2100, 0x1126 }; // UTF16-LE "HI!☑"
+    GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), sizeof(UTF16LE_hi));
 
-    uint16_t UTF16BE_hi[4] { 0xFEFF, 0x0048, 0x0049, 0x0021 }; // UTF16-BE "HI!"
-    GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), 4 * 2);
+    uint16_t UTF16BE_hi[5] { 0xFEFF, 0x0048, 0x0049, 0x0021, 0x2611 }; // UTF16-BE "HI!☑"
+    GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), sizeof(UTF16BE_hi));
 
     // Let's assert both GooString's are different
-    Q_ASSERT(GooUTF16LE.cmp(&GooUTF16BE) != 0);
+    QVERIFY(GooUTF16LE.cmp(&GooUTF16BE));
 
     Unicode *UCS4fromLE, *UCS4fromBE;
     const int len1 = TextStringToUCS4(&GooUTF16LE, &UCS4fromLE);
     const int len2 = TextStringToUCS4(&GooUTF16BE, &UCS4fromBE);
 
-    // 3 as TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points
-    Q_ASSERT(len1 == len2);
-    Q_ASSERT(len1 == 3);
+    // len is 4 because TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points
+    QCOMPARE(len1, len2);
+    QCOMPARE(len1, 4);
 
     // Check that now after conversion, UCS4fromLE and UCS4fromBE are now the same
     for (int i = 0; i < len1; i++) {
-        Q_ASSERT(UCS4fromLE[i] == UCS4fromBE[i]);
+        QCOMPARE(UCS4fromLE[i], UCS4fromBE[i]);
     }
 
+    const QString expected = QStringLiteral("HI!☑");
+
     // Do some final verifications, checking the strings to be "HI!"
     QVERIFY(*UCS4fromLE == *UCS4fromBE);
-    QVERIFY(compare(UCS4fromLE, "HI!", 3));
-    QVERIFY(compare(UCS4fromBE, "HI!", 3));
+    QVERIFY(compare(UCS4fromLE, expected.utf16(), len1));
+    QVERIFY(compare(UCS4fromBE, expected.utf16(), len1));
 }
 
 QTEST_GUILESS_MAIN(TestUTFConversion)
diff --git a/qt6/tests/check_utf_conversion.cpp b/qt6/tests/check_utf_conversion.cpp
index f28829f4..f2a66096 100644
--- a/qt6/tests/check_utf_conversion.cpp
+++ b/qt6/tests/check_utf_conversion.cpp
@@ -18,6 +18,7 @@ private slots:
     void testUTF_data();
     void testUTF();
     void testUnicodeToAscii7();
+    void testUnicodeLittleEndian();
 };
 
 static bool compare(const char *a, const char *b)
@@ -41,9 +42,18 @@ static bool compare(const Unicode *a, const char *b, int len)
             return false;
     }
 
-    return *a == (Unicode)*b;
+    return true;
 }
 
+static bool compare(const Unicode *a, const uint16_t *b, int len)
+{
+    for (int i = 0; i < len; i++) {
+        if (a[i] != b[i])
+            return false;
+    }
+
+    return true;
+}
 void TestUTFConversion::testUTF_data()
 {
     QTest::addColumn<QString>("s");
@@ -143,5 +153,37 @@ void TestUTFConversion::testUnicodeToAscii7()
     free(out_ascii_idx);
 }
 
+void TestUTFConversion::testUnicodeLittleEndian()
+{
+    uint16_t UTF16LE_hi[5] { 0xFFFE, 0x4800, 0x4900, 0x2100, 0x1126 }; // UTF16-LE "HI!☑"
+    GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), sizeof(UTF16LE_hi));
+
+    uint16_t UTF16BE_hi[5] { 0xFEFF, 0x0048, 0x0049, 0x0021, 0x2611 }; // UTF16-BE "HI!☑"
+    GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), sizeof(UTF16BE_hi));
+
+    // Let's assert both GooString's are different
+    QVERIFY(GooUTF16LE.cmp(&GooUTF16BE));
+
+    Unicode *UCS4fromLE, *UCS4fromBE;
+    const int len1 = TextStringToUCS4(&GooUTF16LE, &UCS4fromLE);
+    const int len2 = TextStringToUCS4(&GooUTF16BE, &UCS4fromBE);
+
+    // len is 4 because TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points
+    QCOMPARE(len1, len2);
+    QCOMPARE(len1, 4);
+
+    // Check that now after conversion, UCS4fromLE and UCS4fromBE are now the same
+    for (int i = 0; i < len1; i++) {
+        QCOMPARE(UCS4fromLE[i], UCS4fromBE[i]);
+    }
+
+    const QString expected = QStringLiteral("HI!☑");
+
+    // Do some final verifications, checking the strings to be "HI!"
+    QVERIFY(*UCS4fromLE == *UCS4fromBE);
+    QVERIFY(compare(UCS4fromLE, expected.utf16(), len1));
+    QVERIFY(compare(UCS4fromBE, expected.utf16(), len1));
+}
+
 QTEST_GUILESS_MAIN(TestUTFConversion)
 #include "check_utf_conversion.moc"


More information about the poppler mailing list