[Libreoffice-commits] core.git: compilerplugins/clang lotuswordpro/source reportdesign/source starmath/qa starmath/source sw/qa

Stephan Bergmann sbergman at redhat.com
Thu Sep 28 06:27:33 UTC 2017


 compilerplugins/clang/stringconstant.cxx        |  303 +++++++++++++++++++-----
 compilerplugins/clang/test/stringconstant.cxx   |    6 
 lotuswordpro/source/filter/lwpnumericfmt.hxx    |   13 -
 reportdesign/source/core/api/FixedLine.cxx      |    3 
 starmath/qa/cppunit/test_nodetotextvisitors.cxx |    7 
 starmath/source/ElementsDockingWindow.cxx       |    2 
 sw/qa/extras/htmlimport/htmlimport.cxx          |    2 
 sw/qa/extras/ooxmlexport/ooxmlexport2.cxx       |    6 
 sw/qa/extras/ooxmlexport/ooxmlexport8.cxx       |    4 
 sw/qa/extras/rtfexport/rtfexport.cxx            |   27 +-
 sw/qa/extras/rtfexport/rtfexport2.cxx           |   32 +-
 sw/qa/extras/rtfimport/rtfimport.cxx            |    4 
 sw/qa/extras/uiwriter/uiwriter.cxx              |    2 
 13 files changed, 294 insertions(+), 117 deletions(-)

New commits:
commit c9f3277ea7bb22c395e8938168ce4df9101f7850
Author: Stephan Bergmann <sbergman at redhat.com>
Date:   Wed Sep 27 23:44:21 2017 +0200

    loplugin:stringconstant: Simplify construction of non-ASCII OUString
    
    Change-Id: If80c53978106789824e6154db396baeecc1969dd
    Reviewed-on: https://gerrit.libreoffice.org/42876
    Reviewed-by: Stephan Bergmann <sbergman at redhat.com>
    Tested-by: Stephan Bergmann <sbergman at redhat.com>

diff --git a/compilerplugins/clang/stringconstant.cxx b/compilerplugins/clang/stringconstant.cxx
index 31cccb9b2c4a..e4372a9e29b4 100644
--- a/compilerplugins/clang/stringconstant.cxx
+++ b/compilerplugins/clang/stringconstant.cxx
@@ -9,10 +9,14 @@
 
 #include <algorithm>
 #include <cassert>
+#include <cstdint>
 #include <cstdlib>
+#include <iomanip>
 #include <limits>
+#include <sstream>
 #include <stack>
 #include <string>
+#include <vector>
 #include <iostream>
 
 #include "check.hxx"
@@ -119,6 +123,8 @@ public:
     bool VisitCXXConstructExpr(CXXConstructExpr const * expr);
 
 private:
+    enum class ContentKind { Ascii, Utf8, Arbitrary };
+
     enum class TreatEmpty { DefaultCtor, CheckEmpty, Error };
 
     enum class ChangeKind { Char, CharLen, SingleChar, OUStringLiteral1 };
@@ -128,8 +134,9 @@ private:
     std::string describeChangeKind(ChangeKind kind);
 
     bool isStringConstant(
-        Expr const * expr, unsigned * size, bool * nonArray, bool * nonAscii,
-        bool * embeddedNuls, bool * terminatingNul);
+        Expr const * expr, unsigned * size, bool * nonArray,
+        ContentKind * content, bool * embeddedNuls, bool * terminatingNul,
+        std::vector<char32_t> * utf8Content = nullptr);
 
     bool isZero(Expr const * expr);
 
@@ -507,16 +514,16 @@ bool StringConstant::VisitCallExpr(CallExpr const * expr) {
     {
         unsigned n;
         bool nonArray;
-        bool non;
+        ContentKind cont;
         bool emb;
         bool trm;
         if (!isStringConstant(
-                expr->getArg(0)->IgnoreParenImpCasts(), &n, &nonArray, &non,
+                expr->getArg(0)->IgnoreParenImpCasts(), &n, &nonArray, &cont,
                 &emb, &trm))
         {
             return true;
         }
-        if (non) {
+        if (cont != ContentKind::Ascii) {
             report(
                 DiagnosticsEngine::Warning,
                 ("call of '%0' with string constant argument containing"
@@ -548,16 +555,16 @@ bool StringConstant::VisitCallExpr(CallExpr const * expr) {
         for (unsigned i = 0; i != 2; ++i) {
             unsigned n;
             bool nonArray;
-            bool non;
+            ContentKind cont;
             bool emb;
             bool trm;
             if (!isStringConstant(
-                    expr->getArg(i)->IgnoreParenImpCasts(), &n, &nonArray, &non,
-                    &emb, &trm))
+                    expr->getArg(i)->IgnoreParenImpCasts(), &n, &nonArray,
+                    &cont, &emb, &trm))
             {
                 continue;
             }
-            if (non) {
+            if (cont != ContentKind::Ascii) {
                 report(
                     DiagnosticsEngine::Warning,
                     ("call of '%0' with string constant argument containing"
@@ -593,16 +600,16 @@ bool StringConstant::VisitCallExpr(CallExpr const * expr) {
         for (unsigned i = 0; i != 2; ++i) {
             unsigned n;
             bool nonArray;
-            bool non;
+            ContentKind cont;
             bool emb;
             bool trm;
             if (!isStringConstant(
-                    expr->getArg(i)->IgnoreParenImpCasts(), &n, &nonArray, &non,
-                    &emb, &trm))
+                    expr->getArg(i)->IgnoreParenImpCasts(), &n, &nonArray,
+                    &cont, &emb, &trm))
             {
                 continue;
             }
-            if (non) {
+            if (cont != ContentKind::Ascii) {
                 report(
                     DiagnosticsEngine::Warning,
                     ("call of '%0' with string constant argument containing"
@@ -637,16 +644,16 @@ bool StringConstant::VisitCallExpr(CallExpr const * expr) {
     {
         unsigned n;
         bool nonArray;
-        bool non;
+        ContentKind cont;
         bool emb;
         bool trm;
         if (!isStringConstant(
-                expr->getArg(1)->IgnoreParenImpCasts(), &n, &nonArray, &non,
+                expr->getArg(1)->IgnoreParenImpCasts(), &n, &nonArray, &cont,
                 &emb, &trm))
         {
             return true;
         }
-        if (non) {
+        if (cont != ContentKind::Ascii) {
             report(
                 DiagnosticsEngine::Warning,
                 ("call of '%0' with string constant argument containing"
@@ -756,8 +763,6 @@ bool StringConstant::VisitCXXConstructExpr(CXXConstructExpr const * expr) {
         ChangeKind kind;
         PassThrough pass;
         bool simplify;
-        bool encIsAscii;
-        std::string enc;
         switch (expr->getConstructor()->getNumParams()) {
         case 1:
             if (!loplugin::TypeCheck(
@@ -783,11 +788,11 @@ bool StringConstant::VisitCXXConstructExpr(CXXConstructExpr const * expr) {
                 } else {
                     unsigned n;
                     bool nonArray;
-                    bool non;
+                    ContentKind cont;
                     bool emb;
                     bool trm;
                     if (!isStringConstant(
-                            arg->IgnoreParenImpCasts(), &n, &nonArray, &non,
+                            arg->IgnoreParenImpCasts(), &n, &nonArray, &cont,
                             &emb, &trm))
                     {
                         return true;
@@ -803,7 +808,7 @@ bool StringConstant::VisitCXXConstructExpr(CXXConstructExpr const * expr) {
                     {
                         return true;
                     }
-                    if (non) {
+                    if (cont != ContentKind::Ascii) {
                         report(
                             DiagnosticsEngine::Warning,
                             ("construction of %0 with string constant argument"
@@ -831,12 +836,13 @@ bool StringConstant::VisitCXXConstructExpr(CXXConstructExpr const * expr) {
             {
                 unsigned n;
                 bool nonArray;
-                bool non;
+                ContentKind cont;
                 bool emb;
                 bool trm;
+                std::vector<char32_t> utf8Cont;
                 if (!isStringConstant(
                         expr->getArg(0)->IgnoreParenImpCasts(), &n, &nonArray,
-                        &non, &emb, &trm))
+                        &cont, &emb, &trm, &utf8Cont))
                 {
                     return true;
                 }
@@ -855,20 +861,90 @@ bool StringConstant::VisitCXXConstructExpr(CXXConstructExpr const * expr) {
                         << n << res.toString(10) << expr->getSourceRange();
                     return true;
                 }
+                APSInt enc;
                 if (!expr->getArg(2)->EvaluateAsInt(
-                        res, compiler.getASTContext()))
+                        enc, compiler.getASTContext()))
                 {
                     return true;
                 }
-                encIsAscii = res == 11; // RTL_TEXTENCODING_ASCII_US
-                enc = res.toString(10);
+                auto const encIsAscii = enc == 11; // RTL_TEXTENCODING_ASCII_US
+                auto const encIsUtf8 = enc == 76; // RTL_TEXTENCODING_UTF8
                 if (!expr->getArg(3)->EvaluateAsInt(
                         res, compiler.getASTContext())
                     || res != 0x333) // OSTRING_TO_OUSTRING_CVTFLAGS
                 {
                     return true;
                 }
-                if (non || emb) {
+                if (!encIsAscii && cont == ContentKind::Ascii) {
+                    report(
+                        DiagnosticsEngine::Warning,
+                        ("suspicious 'rtl::OUString' constructor with text"
+                         " encoding %0 but plain ASCII content; use"
+                         " 'RTL_TEXTENCODING_ASCII_US' instead"),
+                        expr->getArg(2)->getExprLoc())
+                        << enc.toString(10) << expr->getSourceRange();
+                    return true;
+                }
+                if (encIsUtf8) {
+                    if (cont == ContentKind::Arbitrary) {
+                        report(
+                            DiagnosticsEngine::Warning,
+                            ("suspicious 'rtl::OUString' constructor with text"
+                             " encoding 'RTL_TEXTENCODING_UTF8' but non-UTF-8"
+                             " content"),
+                            expr->getArg(0)->getExprLoc())
+                            << expr->getSourceRange();
+                    } else {
+                        assert(cont == ContentKind::Utf8);
+                        //TODO: keep original content as much as possible
+                        std::ostringstream s;
+                        for (auto const c: utf8Cont) {
+                            if (c == '\\') {
+                                s << "\\\\";
+                            } else if (c == '"') {
+                                s << "\\\"";
+                            } else if (c == '\a') {
+                                s << "\\a";
+                            } else if (c == '\b') {
+                                s << "\\b";
+                            } else if (c == '\f') {
+                                s << "\\f";
+                            } else if (c == '\n') {
+                                s << "\\n";
+                            } else if (c == '\r') {
+                                s << "\\r";
+                            } else if (c == '\t') {
+                                s << "\\r";
+                            } else if (c == '\v') {
+                                s << "\\v";
+                            } else if (c <= 0x1F || c == 0x7F) {
+                                s << "\\x" << std::oct << std::setw(3)
+                                  << std::setfill('0')
+                                  << static_cast<std::uint_least32_t>(c);
+                            } else if (c < 0x7F) {
+                                s << char(c);
+                            } else if (c <= 0xFFFF) {
+                                s << "\\u" << std::hex << std::uppercase
+                                  << std::setw(4) << std::setfill('0')
+                                  << static_cast<std::uint_least32_t>(c);
+                            } else {
+                                assert(c <= 0x10FFFF);
+                                s << "\\U" << std::hex << std::uppercase
+                                  << std::setw(8) << std::setfill('0')
+                                  << static_cast<std::uint_least32_t>(c);
+                            }
+                        }
+                        report(
+                            DiagnosticsEngine::Warning,
+                            ("simplify construction of %0 with UTF-8 content as"
+                             " OUString(u\"%1\")"),
+                            expr->getExprLoc())
+                            << classdecl << s.str() << expr->getSourceRange();
+
+                    }
+                    return true;
+                }
+                if (cont != ContentKind::Ascii || emb) {
                     // cf. remaining uses of RTL_CONSTASCII_USTRINGPARAM
                     return true;
                 }
@@ -1067,10 +1143,8 @@ bool StringConstant::VisitCXXConstructExpr(CXXConstructExpr const * expr) {
         if (simplify) {
             report(
                 DiagnosticsEngine::Warning,
-                ("simplify construction of %0 with %1%select{ (but beware, the"
-                 " given textencoding %3 is not RTL_TEXTENCODING_ASCII_US)|}2"),
-                expr->getExprLoc())
-                << classdecl << describeChangeKind(kind) << encIsAscii << enc
+                "simplify construction of %0 with %1", expr->getExprLoc())
+                << classdecl << describeChangeKind(kind)
                 << expr->getSourceRange();
         }
         return true;
@@ -1113,13 +1187,14 @@ std::string StringConstant::describeChangeKind(ChangeKind kind) {
 }
 
 bool StringConstant::isStringConstant(
-    Expr const * expr, unsigned * size, bool * nonArray, bool * nonAscii,
-    bool * embeddedNuls, bool * terminatingNul)
+    Expr const * expr, unsigned * size, bool * nonArray, ContentKind * content,
+    bool * embeddedNuls, bool * terminatingNul,
+    std::vector<char32_t> * utf8Content)
 {
     assert(expr != nullptr);
     assert(size != nullptr);
     assert(nonArray != nullptr);
-    assert(nonAscii != nullptr);
+    assert(content != nullptr);
     assert(embeddedNuls != nullptr);
     assert(terminatingNul != nullptr);
     QualType t = expr->getType();
@@ -1167,19 +1242,124 @@ bool StringConstant::isStringConstant(
             return false;
         }
         unsigned n = lit->getLength();
-        bool non = false;
+        ContentKind cont = ContentKind::Ascii;
         bool emb = false;
+        char32_t val = 0;
+        enum class Utf8State { Start, E0, EB, F0, F4, Trail1, Trail2, Trail3 };
+        Utf8State s = Utf8State::Start;
         StringRef str = lit->getString();
         for (unsigned i = 0; i != n; ++i) {
-            if (str[i] == '\0') {
+            auto const c = static_cast<unsigned char>(str[i]);
+            if (c == '\0') {
                 emb = true;
-            } else if (static_cast<unsigned char>(str[i]) >= 0x80) {
-                non = true;
+            }
+            switch (s) {
+            case Utf8State::Start:
+                if (c >= 0x80) {
+                    if (c >= 0xC2 && c <= 0xDF) {
+                        val = c & 0x1F;
+                        s = Utf8State::Trail1;
+                    } else if (c == 0xE0) {
+                        val = c & 0x0F;
+                        s = Utf8State::E0;
+                    } else if ((c >= 0xE1 && c <= 0xEA)
+                               || (c >= 0xEE && c <= 0xEF))
+                    {
+                        val = c & 0x0F;
+                        s = Utf8State::Trail2;
+                    } else if (c == 0xEB) {
+                        val = c & 0x0F;
+                        s = Utf8State::EB;
+                    } else if (c == 0xF0) {
+                        val = c & 0x03;
+                        s = Utf8State::F0;
+                    } else if (c >= 0xF1 && c <= 0xF3) {
+                        val = c & 0x03;
+                        s = Utf8State::Trail3;
+                    } else if (c == 0xF4) {
+                        val = c & 0x03;
+                        s = Utf8State::F4;
+                    } else {
+                        cont = ContentKind::Arbitrary;
+                    }
+                } else if (utf8Content != nullptr
+                           && cont != ContentKind::Arbitrary)
+                {
+                    utf8Content->push_back(c);
+                }
+                break;
+            case Utf8State::E0:
+                if (c >= 0xA0 && c <= 0xBF) {
+                    val = (val << 6) | (c & 0x3F);
+                    s = Utf8State::Trail1;
+                } else {
+                    cont = ContentKind::Arbitrary;
+                    s = Utf8State::Start;
+                }
+                break;
+            case Utf8State::EB:
+                if (c >= 0x80 && c <= 0x9F) {
+                    val = (val << 6) | (c & 0x3F);
+                    s = Utf8State::Trail1;
+                } else {
+                    cont = ContentKind::Arbitrary;
+                    s = Utf8State::Start;
+                }
+                break;
+            case Utf8State::F0:
+                if (c >= 0x90 && c <= 0xBF) {
+                    val = (val << 6) | (c & 0x3F);
+                    s = Utf8State::Trail2;
+                } else {
+                    cont = ContentKind::Arbitrary;
+                    s = Utf8State::Start;
+                }
+                break;
+            case Utf8State::F4:
+                if (c >= 0x80 && c <= 0x8F) {
+                    val = (val << 6) | (c & 0x3F);
+                    s = Utf8State::Trail2;
+                } else {
+                    cont = ContentKind::Arbitrary;
+                    s = Utf8State::Start;
+                }
+                break;
+            case Utf8State::Trail1:
+                if (c >= 0x80 && c <= 0xBF) {
+                    cont = ContentKind::Utf8;
+                    if (utf8Content != nullptr)
+                    {
+                        utf8Content->push_back((val << 6) | (c & 0x3F));
+                        val = 0;
+                    }
+                } else {
+                    cont = ContentKind::Arbitrary;
+                }
+                s = Utf8State::Start;
+                break;
+            case Utf8State::Trail2:
+                if (c >= 0x80 && c <= 0xBF) {
+                    val = (val << 6) | (c & 0x3F);
+                    s = Utf8State::Trail1;
+                } else {
+                    cont = ContentKind::Arbitrary;
+                    s = Utf8State::Start;
+                }
+                break;
+            case Utf8State::Trail3:
+                if (c >= 0x80 && c <= 0xBF) {
+                    val = (val << 6) | (c & 0x3F);
+                    s = Utf8State::Trail2;
+                } else {
+                    cont = ContentKind::Arbitrary;
+                    s = Utf8State::Start;
+                }
+                break;
             }
         }
         *size = n;
         *nonArray = isPtr;
-        *nonAscii = non;
+        *content = cont;
         *embeddedNuls = emb;
         *terminatingNul = true;
         return true;
@@ -1201,7 +1381,7 @@ bool StringConstant::isStringConstant(
             Expr const * e2 = e->IgnoreParenImpCasts();
             if (e2 != e) {
                 return isStringConstant(
-                    e2, size, nonArray, nonAscii, embeddedNuls, terminatingNul);
+                    e2, size, nonArray, content, embeddedNuls, terminatingNul);
             }
             //TODO: string literals are represented as recursive LValues???
             llvm::APInt n
@@ -1211,7 +1391,7 @@ bool StringConstant::isStringConstant(
             assert(n.ule(std::numeric_limits<unsigned>::max()));
             *size = static_cast<unsigned>(n.getLimitedValue());
             *nonArray = isPtr || *nonArray;
-            *nonAscii = false; //TODO
+            *content = ContentKind::Ascii; //TODO
             *embeddedNuls = false; //TODO
             *terminatingNul = true;
             return true;
@@ -1223,8 +1403,9 @@ bool StringConstant::isStringConstant(
             }
             unsigned n = v.getArraySize();
             assert(n != 0);
-            bool non = false;
+            ContentKind cont = ContentKind::Ascii;
             bool emb = false;
+            //TODO: check for ContentType::Utf8
             for (unsigned i = 0; i != n - 1; ++i) {
                 APValue e(v.getArrayInitializedElt(i));
                 if (!e.isInt()) { //TODO: assert?
@@ -1234,7 +1415,7 @@ bool StringConstant::isStringConstant(
                 if (iv == 0) {
                     emb = true;
                 } else if (iv.uge(0x80)) {
-                    non = true;
+                    cont = ContentKind::Arbitrary;
                 }
             }
             APValue e(v.getArrayInitializedElt(n - 1));
@@ -1244,7 +1425,7 @@ bool StringConstant::isStringConstant(
             bool trm = e.getInt() == 0;
             *size = trm ? n - 1 : n;
             *nonArray = isPtr;
-            *nonAscii = non;
+            *content = cont;
             *embeddedNuls = emb;
             *terminatingNul = trm;
             return true;
@@ -1477,16 +1658,16 @@ void StringConstant::handleChar(
 {
     unsigned n;
     bool nonArray;
-    bool non;
+    ContentKind cont;
     bool emb;
     bool trm;
     if (!isStringConstant(
-            expr->getArg(arg)->IgnoreParenImpCasts(), &n, &nonArray, &non, &emb,
-            &trm))
+            expr->getArg(arg)->IgnoreParenImpCasts(), &n, &nonArray, &cont,
+            &emb, &trm))
     {
         return;
     }
-    if (non) {
+    if (cont != ContentKind::Ascii) {
         report(
             DiagnosticsEngine::Warning,
             ("call of '%0' with string constant argument containing non-ASCII"
@@ -1537,11 +1718,11 @@ void StringConstant::handleCharLen(
     // out how to do that yet anyway):
     unsigned n;
     bool nonArray;
-    bool non;
+    ContentKind cont;
     bool emb;
     bool trm;
     if (!(isStringConstant(
-              expr->getArg(arg1)->IgnoreParenImpCasts(), &n, &nonArray, &non,
+              expr->getArg(arg1)->IgnoreParenImpCasts(), &n, &nonArray, &cont,
               &emb, &trm)
           && trm))
     {
@@ -1565,13 +1746,13 @@ void StringConstant::handleCharLen(
         }
         unsigned n2;
         bool nonArray2;
-        bool non2;
+        ContentKind cont2;
         bool emb2;
         bool trm2;
         if (!(isStringConstant(
                   subs->getBase()->IgnoreParenImpCasts(), &n2, &nonArray2,
-                  &non2, &emb2, &trm2)
-              && n2 == n && non2 == non && emb2 == emb && trm2 == trm
+                  &cont2, &emb2, &trm2)
+              && n2 == n && cont2 == cont && emb2 == emb && trm2 == trm
                   //TODO: same strings
               && subs->getIdx()->EvaluateAsInt(res, compiler.getASTContext())
               && res == 0))
@@ -1579,7 +1760,7 @@ void StringConstant::handleCharLen(
             return;
         }
     }
-    if (non) {
+    if (cont != ContentKind::Ascii) {
         report(
             DiagnosticsEngine::Warning,
             ("call of '%0' with string constant argument containing non-ASCII"
@@ -1663,16 +1844,16 @@ void StringConstant::handleOUStringCtor(
     }
     unsigned n;
     bool nonArray;
-    bool non;
+    ContentKind cont;
     bool emb;
     bool trm;
     if (!isStringConstant(
-            e3->getArg(0)->IgnoreParenImpCasts(), &n, &nonArray, &non, &emb,
+            e3->getArg(0)->IgnoreParenImpCasts(), &n, &nonArray, &cont, &emb,
             &trm))
     {
         return;
     }
-    //TODO: non, emb, trm
+    //TODO: cont, emb, trm
     if (rewriter != nullptr) {
         auto loc1 = e3->getLocStart();
         auto range = e3->getParenOrBraceRange();
@@ -1753,11 +1934,11 @@ void StringConstant::handleFunArgOstring(
     auto argExpr = expr->getArg(arg)->IgnoreParenImpCasts();
     unsigned n;
     bool nonArray;
-    bool non;
+    ContentKind cont;
     bool emb;
     bool trm;
-    if (isStringConstant(argExpr, &n, &nonArray, &non, &emb, &trm)) {
-        if (non || emb) {
+    if (isStringConstant(argExpr, &n, &nonArray, &cont, &emb, &trm)) {
+        if (cont != ContentKind::Ascii || emb) {
             return;
         }
         if (!trm) {
@@ -1797,7 +1978,7 @@ void StringConstant::handleFunArgOstring(
             case 2:
                 if (isStringConstant(
                         cexpr->getArg(0)->IgnoreParenImpCasts(), &n, &nonArray,
-                        &non, &emb, &trm))
+                        &cont, &emb, &trm))
                 {
                     APSInt res;
                     if (cexpr->getArg(1)->EvaluateAsInt(
diff --git a/compilerplugins/clang/test/stringconstant.cxx b/compilerplugins/clang/test/stringconstant.cxx
index 8a830f12c717..ee79c5738ac9 100644
--- a/compilerplugins/clang/test/stringconstant.cxx
+++ b/compilerplugins/clang/test/stringconstant.cxx
@@ -62,12 +62,16 @@ int main() {
     (void)aFoo2;
 
     (void) OUString("xxx", 3, RTL_TEXTENCODING_ASCII_US); // expected-error {{simplify construction of 'OUString' with string constant argument [loplugin:stringconstant]}}
-    (void) OUString("xxx", 3, RTL_TEXTENCODING_ISO_8859_1); // expected-error {{simplify construction of 'OUString' with string constant argument (but beware, the given textencoding 12 is not RTL_TEXTENCODING_ASCII_US) [loplugin:stringconstant]}}
+    (void) OUString("xxx", 3, RTL_TEXTENCODING_ISO_8859_1); // expected-error {{suspicious 'rtl::OUString' constructor with text encoding 12 but plain ASCII content; use 'RTL_TEXTENCODING_ASCII_US' instead [loplugin:stringconstant]}}
     (void) OUString("x\xA0x", 3, RTL_TEXTENCODING_ISO_8859_1);
 
     (void) OUString("xxx", 2, RTL_TEXTENCODING_ASCII_US); // expected-error {{suspicious 'rtl::OUString' constructor with literal of length 3 and non-matching length argument 2 [loplugin:stringconstant]}}
 
     (void) OUString(u8"xxx", 3, RTL_TEXTENCODING_ASCII_US); // expected-error {{simplify construction of 'OUString' with string constant argument [loplugin:stringconstant]}}
+
+    (void) OUString("\x80", 1, RTL_TEXTENCODING_UTF8); // expected-error {{suspicious 'rtl::OUString' constructor with text encoding 'RTL_TEXTENCODING_UTF8' but non-UTF-8 content [loplugin:stringconstant]}}
+
+    (void) OUString("\xC2\x80", 2, RTL_TEXTENCODING_UTF8); // expected-error {{simplify construction of 'OUString' with UTF-8 content as OUString(u"\u0080") [loplugin:stringconstant]}}
 }
 
 
diff --git a/lotuswordpro/source/filter/lwpnumericfmt.hxx b/lotuswordpro/source/filter/lwpnumericfmt.hxx
index a0783ec0dd19..592a93541c54 100644
--- a/lotuswordpro/source/filter/lwpnumericfmt.hxx
+++ b/lotuswordpro/source/filter/lwpnumericfmt.hxx
@@ -180,9 +180,6 @@ enum
     FMT_EURO                = 52
 };
 
-#define RTL_CONSTUTF8_USTRINGPARAM( constAsciiStr ) (&(constAsciiStr)[0]), \
-    ((sal_Int32)(SAL_N_ELEMENTS(constAsciiStr)-1)), RTL_TEXTENCODING_UTF8
-
 class LwpCurrencyPool
 {
 public:
@@ -201,9 +198,9 @@ private:
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("oS",true, true);//FMT_AUSTRIANSCHILLING = 3,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("BF",true, true);//FMT_BELGIANFRANC      = 4,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("R$",false, true);//FMT_BRAZILIANCRUZEIRO    = 5,
-        m_aCurrencyInfo[nC++]=LwpCurrencyInfo(OUString(RTL_CONSTUTF8_USTRINGPARAM("\357\277\241")));                 //FMT_BRITISHPOUND      = 6,
+        m_aCurrencyInfo[nC++]=LwpCurrencyInfo(OUString(u"\uFFE1"));                 //FMT_BRITISHPOUND      = 6,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("C$");                   //FMT_CANADIANDOLLAR        = 7,
-        m_aCurrencyInfo[nC++]=LwpCurrencyInfo(OUString(RTL_CONSTUTF8_USTRINGPARAM("PRC\357\277\245")),false,true);   //FMT_CHINESEYUAN           = 8,
+        m_aCurrencyInfo[nC++]=LwpCurrencyInfo(OUString(u"PRC\uFFE5"),false,true);   //FMT_CHINESEYUAN           = 8,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("Kc",true, true);//FMT_CZECHKORUNA           = 9,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("Dkr",false, true);//FMT_DANISHKRONE         = 10,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("ECU",true, true);//FMT_ECU                  = 11,
@@ -215,9 +212,9 @@ private:
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("Ft",true, true);//FMT_HUNGARIANFORINT       = 17,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("Rs",false, true);//FMT_INDIANRUPEE          = 18,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("Rp",false, true);//FMT_INDONESIANRUPIAH = 19,
-        m_aCurrencyInfo[nC++]=LwpCurrencyInfo(OUString(RTL_CONSTUTF8_USTRINGPARAM("IR\357\277\241")));                   //FMT_IRISHPUNT         = 20,
+        m_aCurrencyInfo[nC++]=LwpCurrencyInfo(OUString(u"IR\uFFE1"));                   //FMT_IRISHPUNT         = 20,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("L.",false, true);//FMT_ITALIANLIRA          = 21,
-        m_aCurrencyInfo[nC++]=LwpCurrencyInfo(OUString(RTL_CONSTUTF8_USTRINGPARAM("\357\277\245")));             //FMT_JAPANESEYEN           = 22,
+        m_aCurrencyInfo[nC++]=LwpCurrencyInfo(OUString(u"\uFFE5"));             //FMT_JAPANESEYEN           = 22,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("LF",true, true);//FMT_LUXEMBOURGFRANC       = 23,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("Rm",false, true);//FMT_MALAYSIANRINGGIT = 24,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("Mex$");                 //FMT_MEXICANPESO           = 25,
@@ -241,7 +238,7 @@ private:
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("$");                    //FMT_USDOLLAR          = 43,
         m_aCurrencyInfo[nC++]=LwpCurrencyInfo("OTH",false, true);//FMT_OTHERCURRENCY       = 44,
 
-        m_aCurrencyInfo[FMT_EURO]=LwpCurrencyInfo(OUString(RTL_CONSTUTF8_USTRINGPARAM("\342\202\254")));             //FMT_EURO              = 52
+        m_aCurrencyInfo[FMT_EURO]=LwpCurrencyInfo(OUString(u"\u20AC"));             //FMT_EURO              = 52
     }
 };
 
diff --git a/reportdesign/source/core/api/FixedLine.cxx b/reportdesign/source/core/api/FixedLine.cxx
index be9c96ccdd64..3b79e32d785f 100644
--- a/reportdesign/source/core/api/FixedLine.cxx
+++ b/reportdesign/source/core/api/FixedLine.cxx
@@ -510,8 +510,7 @@ awt::Size SAL_CALL OFixedLine::getSize(  )
 
 void SAL_CALL OFixedLine::setSize( const awt::Size& aSize )
 {
-    const char hundredthmmC[] = "0\xe2\x80\x89\xC2\xB5""m"; // in UTF-8: 0, thin space, µ (micro), m (meter)
-    const OUString hundredthmm(hundredthmmC, sizeof(hundredthmmC)-1, RTL_TEXTENCODING_UTF8);
+    const OUString hundredthmm(u"0\u2009\u00B5m"); // 0, thin space, µ (micro), m (meter)
     if ( aSize.Width < MIN_WIDTH && m_nOrientation == 1 )
         throw beans::PropertyVetoException("Too small width for FixedLine; minimum is "  + OUString::number(MIN_WIDTH)  + hundredthmm, static_cast<cppu::OWeakObject*>(this));
     else if ( aSize.Height < MIN_HEIGHT && m_nOrientation == 0 )
diff --git a/starmath/qa/cppunit/test_nodetotextvisitors.cxx b/starmath/qa/cppunit/test_nodetotextvisitors.cxx
index aec16ea30738..dee561d371aa 100644
--- a/starmath/qa/cppunit/test_nodetotextvisitors.cxx
+++ b/starmath/qa/cppunit/test_nodetotextvisitors.cxx
@@ -667,12 +667,9 @@ void Test::testMiscEquivalent()
 
 void Test::testParser()
 {
-    char const* const formula = "{ \xf0\x9d\x91\x8e }"; // non-BMP Unicode
-    char const* const expected = "\xf0\x9d\x91\x8e";
-
     OUString sOutput;
-    OUString sInput = OUString(formula, strlen(formula), RTL_TEXTENCODING_UTF8);
-    OUString sExpected = OUString(expected, strlen(expected), RTL_TEXTENCODING_UTF8);
+    OUString sInput(u"{ \U0001D44E }"); // non-BMP Unicode
+    OUString sExpected(u"\U0001D44E");
     std::unique_ptr<SmNode> pNode(SmParser().ParseExpression(sInput));
     pNode->Prepare(xDocShRef->GetFormat(), *xDocShRef);
     SmNodeToTextVisitor(pNode.get(), sOutput);
diff --git a/starmath/source/ElementsDockingWindow.cxx b/starmath/source/ElementsDockingWindow.cxx
index 7adf4ce5fd92..1a8f93119d5d 100644
--- a/starmath/source/ElementsDockingWindow.cxx
+++ b/starmath/source/ElementsDockingWindow.cxx
@@ -559,7 +559,7 @@ void SmElementsControl::addElements(const std::pair<const char*, const char*> aE
         } else {
             OUString aElement(OUString::createFromAscii(pElement));
             if (aElement == RID_NEWLINE)
-                addElement(OUString( "\xe2\x86\xb5", 3, RTL_TEXTENCODING_UTF8 ), aElement, SmResId(pElementHelp));
+                addElement(OUString(u"\u21B5"), aElement, SmResId(pElementHelp));
             else if (aElement == RID_SBLANK)
                 addElement("\"`\"", aElement, SmResId(pElementHelp));
             else if (aElement == RID_BLANK)
diff --git a/sw/qa/extras/htmlimport/htmlimport.cxx b/sw/qa/extras/htmlimport/htmlimport.cxx
index 5e704b3f3be9..9b20423ea945 100644
--- a/sw/qa/extras/htmlimport/htmlimport.cxx
+++ b/sw/qa/extras/htmlimport/htmlimport.cxx
@@ -146,7 +146,7 @@ DECLARE_HTMLIMPORT_TEST(testListStyleType, "list-style.html")
         if (rProp.Name == "BulletChar")
         {
             // should be 'o'.
-            CPPUNIT_ASSERT_EQUAL(OUString("\xEE\x80\x89", 3, RTL_TEXTENCODING_UTF8), rProp.Value.get<OUString>());
+            CPPUNIT_ASSERT_EQUAL(OUString(u"\uE009"), rProp.Value.get<OUString>());
             bBulletFound = true;
             break;
         }
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx
index b5e63cf65a86..1027750aba3f 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx
@@ -224,8 +224,8 @@ DECLARE_OOXMLEXPORT_TEST(testFdo51034, "fdo51034.odt")
 // Replace symbol - (i.e. U+2212) with ASCII - , LO does this change and it shouldn't matter.
 void CHECK_FORMULA(OUString const & expected, OUString const & actual) {
     CPPUNIT_ASSERT_EQUAL(
-        expected.replaceAll( " ", "" ).replaceAll( OUString( "\xe2\x88\x92", strlen( "\xe2\x88\x92" ), RTL_TEXTENCODING_UTF8 ), "-" ),
-        actual.replaceAll( " ", "" ).replaceAll( OUString( "\xe2\x88\x92", strlen( "\xe2\x88\x92" ), RTL_TEXTENCODING_UTF8 ), "-" ));
+        expected.replaceAll( " ", "" ).replaceAll( OUString(u"\u2212"), "-" ),
+        actual.replaceAll( " ", "" ).replaceAll( OUString(u"\u2212"), "-" ));
 }
 
 DECLARE_OOXMLEXPORT_TEST(testMathAccents, "math-accents.docx")
@@ -942,7 +942,7 @@ DECLARE_OOXMLEXPORT_TEST(testFdo66781, "fdo66781.docx")
         const beans::PropertyValue& rProp = aProps[i];
         if (rProp.Name == "BulletChar")
         {
-            CPPUNIT_ASSERT_EQUAL(OUString("\x0", 1, RTL_TEXTENCODING_UTF8), rProp.Value.get<OUString>());
+            CPPUNIT_ASSERT_EQUAL(OUString("\x0", 1, RTL_TEXTENCODING_ASCII_US), rProp.Value.get<OUString>());
             return;
         }
     }
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport8.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport8.cxx
index c59fb365146f..03fd2fbc7103 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport8.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport8.cxx
@@ -549,7 +549,7 @@ DECLARE_OOXMLEXPORT_TEST(testFdo74357, "fdo74357.docx")
 DECLARE_OOXMLEXPORT_TEST(testFdo55187, "fdo55187.docx")
 {
     // 0x010d was imported as a newline.
-    getParagraph(1, OUString("lup\xc4\x8dka", 7, RTL_TEXTENCODING_UTF8));
+    getParagraph(1, OUString(u"lup\u010Dka"));
 }
 
 DECLARE_OOXMLEXPORT_TEST(testN780563, "n780563.docx")
@@ -819,7 +819,7 @@ DECLARE_OOXMLEXPORT_TEST(testFdo59638, "fdo59638.docx")
         if (rProp.Name == "BulletChar")
         {
             // Was '*', should be 'o'.
-            CPPUNIT_ASSERT_EQUAL(OUString("\xEF\x82\xB7", 3, RTL_TEXTENCODING_UTF8), rProp.Value.get<OUString>());
+            CPPUNIT_ASSERT_EQUAL(OUString(u"\uF0B7"), rProp.Value.get<OUString>());
             return;
         }
     }
diff --git a/sw/qa/extras/rtfexport/rtfexport.cxx b/sw/qa/extras/rtfexport/rtfexport.cxx
index 6e0960f39116..004535694da7 100644
--- a/sw/qa/extras/rtfexport/rtfexport.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport.cxx
@@ -230,14 +230,14 @@ DECLARE_RTFEXPORT_TEST(testMathD, "math-d.rtf")
 DECLARE_RTFEXPORT_TEST(testMathEscaping, "math-escaping.rtf")
 {
     OUString aActual = getFormula(getRun(getParagraph(1), 1));
-    OUString aExpected("\xc3\xa1 \\{", 5, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u00E1 \\{");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 }
 
 DECLARE_RTFEXPORT_TEST(testMathLim, "math-lim.rtf")
 {
     OUString aActual = getFormula(getRun(getParagraph(1), 1));
-    OUString aExpected("lim from {x \xe2\x86\x92 1} {x}", 22, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"lim from {x \u2192 1} {x}");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 }
 
@@ -258,20 +258,19 @@ DECLARE_RTFEXPORT_TEST(testMathBox, "math-mbox.rtf")
 DECLARE_RTFEXPORT_TEST(testMathMso2007, "math-mso2007.rtf")
 {
     OUString aActual = getFormula(getRun(getParagraph(1), 1));
-    OUString aExpected("A = \xcf\x80 {r} ^ {2}", 16, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"A = \u03C0 {r} ^ {2}");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 
     aActual = getFormula(getRun(getParagraph(2), 1));
-    aExpected = OUString("{left (x + a right )} ^ {n} = sum from {k = 0} to {n} {left (stack { n # k } right ) {x} ^ {k} {a} ^ {n \xe2\x88\x92 k}}", 111, RTL_TEXTENCODING_UTF8);
+    aExpected = OUString(u"{left (x + a right )} ^ {n} = sum from {k = 0} to {n} {left (stack { n # k } right ) {x} ^ {k} {a} ^ {n \u2212 k}}");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 
     aActual = getFormula(getRun(getParagraph(3), 1));
-    aExpected = OUString("{left (1 + x right )} ^ {n} = 1 + {nx} over {1 !} + {n left (n \xe2\x88\x92 1 right ) {x} ^ {2}} over {2 !} + \xe2\x80\xa6", 104, RTL_TEXTENCODING_UTF8);
+    aExpected = OUString(u"{left (1 + x right )} ^ {n} = 1 + {nx} over {1 !} + {n left (n \u2212 1 right ) {x} ^ {2}} over {2 !} + \u2026");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 
     aActual = getFormula(getRun(getParagraph(4), 1));
-    aExpected = OUString("f left (x right ) = {a} rsub {0} + sum from {n = 1} to {\xe2\x88\x9e} {left ({a} rsub {n} cos {n\xcf\x80x} over {L} + {b} rsub {n} sin {n\xcf\x80x} over {L} right )}", 144,
-                         RTL_TEXTENCODING_UTF8);
+    aExpected = OUString(u"f left (x right ) = {a} rsub {0} + sum from {n = 1} to {\u221E} {left ({a} rsub {n} cos {n\u03C0x} over {L} + {b} rsub {n} sin {n\u03C0x} over {L} right )}");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 
     aActual = getFormula(getRun(getParagraph(5), 1));
@@ -279,19 +278,19 @@ DECLARE_RTFEXPORT_TEST(testMathMso2007, "math-mso2007.rtf")
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 
     aActual = getFormula(getRun(getParagraph(6), 1));
-    aExpected = OUString("x = {\xe2\x88\x92 b \xc2\xb1 sqrt {{b} ^ {2} \xe2\x88\x92 4 ac}} over {2 a}", 51, RTL_TEXTENCODING_UTF8);
+    aExpected = OUString(u"x = {\u2212 b \u00B1 sqrt {{b} ^ {2} \u2212 4 ac}} over {2 a}");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 
     aActual = getFormula(getRun(getParagraph(7), 1));
-    aExpected = OUString("{e} ^ {x} = 1 + {x} over {1 !} + {{x} ^ {2}} over {2 !} + {{x} ^ {3}} over {3 !} + \xe2\x80\xa6 , \xe2\x88\x92 \xe2\x88\x9e < x < \xe2\x88\x9e", 106, RTL_TEXTENCODING_UTF8);
+    aExpected = OUString(u"{e} ^ {x} = 1 + {x} over {1 !} + {{x} ^ {2}} over {2 !} + {{x} ^ {3}} over {3 !} + \u2026 , \u2212 \u221E < x < \u221E");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 
     aActual = getFormula(getRun(getParagraph(8), 1));
-    aExpected = OUString("sin \xce\xb1 \xc2\xb1 sin \xce\xb2 = 2 sin {1} over {2} left (\xce\xb1 \xc2\xb1 \xce\xb2 right ) cos {1} over {2} left (\xce\xb1 \xe2\x88\x93 \xce\xb2 right )", 101, RTL_TEXTENCODING_UTF8);
+    aExpected = OUString(u"sin \u03B1 \u00B1 sin \u03B2 = 2 sin {1} over {2} left (\u03B1 \u00B1 \u03B2 right ) cos {1} over {2} left (\u03B1 \u2213 \u03B2 right )");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 
     aActual = getFormula(getRun(getParagraph(9), 1));
-    aExpected = OUString("cos \xce\xb1 + cos \xce\xb2 = 2 cos {1} over {2} left (\xce\xb1 + \xce\xb2 right ) cos {1} over {2} left (\xce\xb1 \xe2\x88\x92 \xce\xb2 right )", 99, RTL_TEXTENCODING_UTF8);
+    aExpected = OUString(u"cos \u03B1 + cos \u03B2 = 2 cos {1} over {2} left (\u03B1 + \u03B2 right ) cos {1} over {2} left (\u03B1 \u2212 \u03B2 right )");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 }
 
@@ -375,7 +374,7 @@ DECLARE_RTFEXPORT_TEST(testMathRuns, "math-runs.rtf")
 DECLARE_RTFEXPORT_TEST(testFdo77979, "fdo77979.odt")
 {
     // font name is encoded with \fcharset of font
-    OUString aExpected("\xE5\xBE\xAE\xE8\xBD\xAF\xE9\x9B\x85\xE9\xBB\x91", 12, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u5FAE\u8F6F\u96C5\u9ED1");
     CPPUNIT_ASSERT_EQUAL(aExpected, getProperty<OUString>(getRun(getParagraph(1), 1), "CharFontName"));
 }
 
@@ -456,7 +455,7 @@ DECLARE_RTFEXPORT_TEST(testFdo61507, "fdo61507.rtf")
 
     uno::Reference<document::XDocumentPropertiesSupplier> xDocumentPropertiesSupplier(mxComponent, uno::UNO_QUERY);
     uno::Reference<document::XDocumentProperties> xDocumentProperties(xDocumentPropertiesSupplier->getDocumentProperties());
-    OUString aExpected = OUString("\xc3\x89\xc3\x81\xc5\x90\xc5\xb0\xe2\x88\xad", 11, RTL_TEXTENCODING_UTF8);
+    OUString aExpected = OUString(u"\u00C9\u00C1\u0150\u0170\u222D");
     CPPUNIT_ASSERT_EQUAL(aExpected, xDocumentProperties->getTitle());
 
     // Only "Hello.", no additional characters.
@@ -484,7 +483,7 @@ DECLARE_RTFEXPORT_TEST(testMnor, "mnor.rtf")
 {
     // \mnor wasn't handled, leading to missing quotes around "divF" and so on.
     OUString aActual = getFormula(getRun(getParagraph(1), 1));
-    OUString aExpected("iiint from {V} to <?> {\"divF\"} dV = llint from {S} to <?> {\"F\" \xe2\x88\x99 \"n\" dS}", 74, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"iiint from {V} to <?> {\"divF\"} dV = llint from {S} to <?> {\"F\" \u2219 \"n\" dS}");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 }
 
diff --git a/sw/qa/extras/rtfexport/rtfexport2.cxx b/sw/qa/extras/rtfexport/rtfexport2.cxx
index 85e101087444..c81a2cff3615 100644
--- a/sw/qa/extras/rtfexport/rtfexport2.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport2.cxx
@@ -238,7 +238,7 @@ DECLARE_RTFEXPORT_TEST(testFdo79384, "fdo79384.rtf")
 {
     uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1);
 
-    CPPUNIT_ASSERT_EQUAL(OUString("Маркеры спискамЫ", 31, RTL_TEXTENCODING_UTF8), xTextRange->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString(u"Маркеры спискамЫ"), xTextRange->getString());
 }
 
 DECLARE_RTFEXPORT_TEST(testFdo47326, "fdo47326.rtf")
@@ -279,7 +279,7 @@ DECLARE_RTFEXPORT_TEST(testFdo45394, "fdo45394.rtf")
     uno::Reference<text::XText> xHeaderText = getProperty< uno::Reference<text::XText> >(getStyles("PageStyles")->getByName("Standard"), "HeaderText");
     OUString aActual = xHeaderText->getString();
     // Encoding in the header was wrong.
-    OUString aExpected("\xd0\x9f\xd0\x9a \xd0\xa0\xd0\x98\xd0\x9a", 11, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u041F\u041A \u0420\u0418\u041A");
     CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
 
     uno::Reference<text::XTextTablesSupplier> xTextTablesSupplier(mxComponent, uno::UNO_QUERY);
@@ -344,7 +344,7 @@ DECLARE_RTFEXPORT_TEST(testFdo48023, "fdo48023.rtf")
     uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1);
 
     // Implicit encoding detection based on locale was missing
-    OUString aExpected("\xd0\x9f\xd1\x80\xd0\xbe\xd0\xb3\xd1\x80\xd0\xb0\xd0\xbc\xd0\xbc\xd0\xb8\xd1\x81\xd1\x82", 22, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u041F\u0440\u043E\u0433\u0440\u0430\u043C\u043C\u0438\u0441\u0442");
     CPPUNIT_ASSERT_EQUAL(aExpected, xTextRange->getString());
 }
 
@@ -366,7 +366,7 @@ DECLARE_RTFEXPORT_TEST(testFdo44211, "fdo44211.rtf")
 {
     uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1);
 
-    OUString aExpected("\xc4\x85\xc4\x8d\xc4\x99", 6, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u0105\u010D\u0119");
     CPPUNIT_ASSERT_EQUAL(aExpected, xTextRange->getString());
 }
 
@@ -534,7 +534,7 @@ DECLARE_RTFEXPORT_TEST(testFdo36089, "fdo36089.rtf")
 
 DECLARE_RTFEXPORT_TEST(testFdo48446, "fdo48446.rtf")
 {
-    OUString aExpected("\xd0\x98\xd0\xbc\xd1\x8f", 6, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u0418\u043C\u044F");
     getParagraph(1, aExpected);
 }
 
@@ -683,7 +683,7 @@ DECLARE_RTFEXPORT_TEST(testFdo56512, "fdo56512.rtf")
     uno::Reference<text::XTextFramesSupplier> xTextFramesSupplier(mxComponent, uno::UNO_QUERY);
     uno::Reference<container::XIndexAccess> xIndexAccess(xTextFramesSupplier->getTextFrames(), uno::UNO_QUERY);
     uno::Reference<text::XTextRange> xTextRange(xIndexAccess->getByIndex(0), uno::UNO_QUERY);
-    OUString aExpected("\xd7\xa2\xd7\x95\xd7\xa1\xd7\xa7 \xd7\x9e\xd7\x95\xd7\xa8\xd7\xa9\xd7\x94 ", 20, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u05E2\u05D5\u05E1\u05E7 \u05DE\u05D5\u05E8\u05E9\u05D4 ");
     CPPUNIT_ASSERT_EQUAL(aExpected, xTextRange->getString());
 }
 
@@ -828,7 +828,7 @@ DECLARE_RTFEXPORT_TEST(testFdo59638, "fdo59638.rtf")
         if (rProp.Name == "BulletChar")
         {
             // Was '*', should be 'o'.
-            CPPUNIT_ASSERT_EQUAL(OUString("\xEF\x82\xB7", 3, RTL_TEXTENCODING_UTF8), rProp.Value.get<OUString>());
+            CPPUNIT_ASSERT_EQUAL(OUString(u"\uF0B7"), rProp.Value.get<OUString>());
             return;
         }
     }
@@ -923,7 +923,7 @@ DECLARE_RTFEXPORT_TEST(testFdo42109, "fdo42109.rtf")
 DECLARE_RTFEXPORT_TEST(testFdo62977, "fdo62977.rtf")
 {
     // The middle character was imported as '?' instead of the proper unicode value.
-    getRun(getParagraph(1), 1, OUString("\xE5\xB9\xB4\xEF\xBC\x94\xE6\x9C\x88", 9, RTL_TEXTENCODING_UTF8));
+    getRun(getParagraph(1), 1, OUString(u"\u5E74\uFF14\u6708"));
 }
 
 DECLARE_RTFEXPORT_TEST(testN818997, "n818997.rtf")
@@ -935,7 +935,7 @@ DECLARE_RTFEXPORT_TEST(testN818997, "n818997.rtf")
 DECLARE_RTFEXPORT_TEST(testFdo64671, "fdo64671.rtf")
 {
     // Additional '}' was inserted before the special character.
-    getRun(getParagraph(1), 1, OUString("\xC5\xBD", 2, RTL_TEXTENCODING_UTF8));
+    getRun(getParagraph(1), 1, OUString(u"\u017D"));
 }
 
 DECLARE_RTFEXPORT_TEST(testFdo62044, "fdo62044.rtf")
@@ -1034,7 +1034,7 @@ DECLARE_RTFEXPORT_TEST(testFdo77996, "fdo77996.rtf")
     uno::Reference<document::XDocumentPropertiesSupplier> xDocumentPropertiesSupplier(mxComponent, uno::UNO_QUERY);
     uno::Reference<document::XDocumentProperties> xProps(xDocumentPropertiesSupplier->getDocumentProperties());
     CPPUNIT_ASSERT_EQUAL(OUString("Aln Lin (Bei Jing)"), xProps->getAuthor());
-    OUString aTitle("\xe5\x8e\xa6\xe9\x97\xa8\xe9\x92\xa8\xe4\xb8\x9a\xe8\x82\xa1\xe4\xbb\xbd\xe6\x9c\x89\xe9\x99\x90\xe5\x85\xac\xe5\x8f\xb8", 30, RTL_TEXTENCODING_UTF8);
+    OUString aTitle(u"\u53A6\u95E8\u94A8\u4E1A\u80A1\u4EFD\u6709\u9650\u516C\u53F8");
     CPPUNIT_ASSERT_EQUAL(aTitle, xProps->getTitle());
     uno::Reference<beans::XPropertySet> xUDProps(xProps->getUserDefinedProperties(), uno::UNO_QUERY);
     CPPUNIT_ASSERT_EQUAL(OUString("jay"), getProperty<OUString>(xUDProps, "Operator"));
@@ -1111,7 +1111,7 @@ DECLARE_RTFEXPORT_TEST(testFdo44715, "fdo44715.rtf")
 DECLARE_RTFEXPORT_TEST(testFdo68076, "fdo68076.rtf")
 {
     // Encoding of the last char was wrong (more 'o' than 'y').
-    OUString aExpected("\xD0\x9E\xD0\xB1\xD1\x8A\xD0\xB5\xD0\xBA\xD1\x82 \xE2\x80\x93 \xD1\x83", 19, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u041E\u0431\u044A\u0435\u043A\u0442 \u2013 \u0443");
     getParagraph(1, aExpected);
 }
 
@@ -1353,12 +1353,12 @@ DECLARE_RTFEXPORT_TEST(testDprectAnchor, "dprect-anchor.rtf")
 
 DECLARE_RTFEXPORT_TEST(testFdo76628, "fdo76628.rtf")
 {
-    OUString aExpected("\xd0\x9e\xd0\x91\xd0\xa0\xd0\x90\xd0\x97\xd0\x95\xd0\xa6", 14, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u041E\u0411\u0420\u0410\u0417\u0415\u0426");
     // Should be 'SAMPLE' in Russian, was garbage.
     getParagraph(1, aExpected);
 
     uno::Reference<text::XText> xHeaderText = getProperty< uno::Reference<text::XText> >(getStyles("PageStyles")->getByName("Standard"), "HeaderText");
-    OUString aExpectedHeader("\xd0\x9f\xd0\xbe\xd0\xb4\xd0\xb3\xd0\xbe\xd1\x82\xd0\xbe\xd0\xb2\xd0\xbb\xd0\xb5\xd0\xbd\xd0\xbe", 24, RTL_TEXTENCODING_UTF8);
+    OUString aExpectedHeader(u"\u041F\u043E\u0434\u0433\u043E\u0442\u043E\u0432\u043B\u0435\u043D\u043E");
     // Should be 'prepared' in Russian, was garbage.
     getParagraphOfText(1, xHeaderText, aExpectedHeader);
 }
@@ -1484,7 +1484,7 @@ DECLARE_RTFEXPORT_TEST(testFdo85889pc, "fdo85889-pc.rtf")
 {
     uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1);
 
-    OUString aExpected("\xc2\xb1\xe2\x89\xa5\xe2\x89\xa4", 8, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u00B1\u2265\u2264");
     CPPUNIT_ASSERT_EQUAL(aExpected, xTextRange->getString());
 }
 
@@ -1492,7 +1492,7 @@ DECLARE_RTFEXPORT_TEST(testFdo85889pca, "fdo85889-pca.rtf")
 {
     uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1);
 
-    OUString aExpected("\xc2\xb1\xe2\x80\x97\xc2\xbe", 7, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u00B1\u2017\u00BE");
     CPPUNIT_ASSERT_EQUAL(aExpected, xTextRange->getString());
 }
 
@@ -1500,7 +1500,7 @@ DECLARE_RTFEXPORT_TEST(testFdo85889mac, "fdo85889-mac.rtf")
 {
     uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1);
 
-    OUString aExpected("\xc3\x92\xc3\x9a\xc3\x9b", 6, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u00D2\u00DA\u00DB");
     CPPUNIT_ASSERT_EQUAL(aExpected, xTextRange->getString());
 }
 
diff --git a/sw/qa/extras/rtfimport/rtfimport.cxx b/sw/qa/extras/rtfimport/rtfimport.cxx
index a7fe074c478f..91bf92c81881 100644
--- a/sw/qa/extras/rtfimport/rtfimport.cxx
+++ b/sw/qa/extras/rtfimport/rtfimport.cxx
@@ -196,7 +196,7 @@ DECLARE_RTFIMPORT_TEST(testFdo45182, "fdo45182.rtf")
     uno::Reference<container::XIndexAccess> xFootnotes(xFootnotesSupplier->getFootnotes(), uno::UNO_QUERY);
     uno::Reference<text::XTextRange> xTextRange(xFootnotes->getByIndex(0), uno::UNO_QUERY);
     // Encoding in the footnote was wrong.
-    OUString aExpected("\xc5\xbeivnost\xc3\xad\n", 11, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"\u017Eivnost\u00ED\n");
     CPPUNIT_ASSERT_EQUAL(aExpected, xTextRange->getString());
 }
 
@@ -634,7 +634,7 @@ public:
     }
     virtual void verify() override
     {
-        OUString aExpected("\xd0\x98\xd0\xbc\xd1\x8f", 6, RTL_TEXTENCODING_UTF8);
+        OUString aExpected(u"\u0418\u043C\u044F");
         getParagraph(1, aExpected);
     }
 };
diff --git a/sw/qa/extras/uiwriter/uiwriter.cxx b/sw/qa/extras/uiwriter/uiwriter.cxx
index 3146016937ba..13e95242a7c5 100644
--- a/sw/qa/extras/uiwriter/uiwriter.cxx
+++ b/sw/qa/extras/uiwriter/uiwriter.cxx
@@ -3839,7 +3839,7 @@ void SwUiWriterTest::testTdf89954()
     SwNodeIndex aNodeIndex(pDoc->GetNodes().GetEndOfContent(), -1);
     // Placeholder character for the comment anchor was ^A (CH_TXTATR_BREAKWORD), not <fff9> (CH_TXTATR_INWORD).
     // As a result, autocorrect did not turn the 't' input into 'T'.
-    OUString aExpected("Tes\xef\xbf\xb9t. Test.", 14, RTL_TEXTENCODING_UTF8);
+    OUString aExpected(u"Tes\uFFF9t. Test.");
     CPPUNIT_ASSERT_EQUAL(aExpected, aNodeIndex.GetNode().GetTextNode()->GetText());
 }
 


More information about the Libreoffice-commits mailing list