[Libreoffice-commits] core.git: include/tools tools/qa tools/source

Jochen Nitschke j.nitschke+logerrit at ok.de
Wed Apr 26 07:16:46 UTC 2017


 include/tools/inetmime.hxx         |   21 +++++++
 tools/qa/cppunit/test_inetmime.cxx |  103 ++++++++++++++++++++++++++++++++++---
 tools/source/inet/inetmime.cxx     |   21 ++++---
 3 files changed, 128 insertions(+), 17 deletions(-)

New commits:
commit bef9fe6e3decc92bdcec6415b1898e4a0202cc6a
Author: Jochen Nitschke <j.nitschke+logerrit at ok.de>
Date:   Tue Apr 18 15:23:56 2017 +0200

    extend unit test for INetMIME::scanContentType
    
    This reverts parts of commit 631b67952909a73ba1851417bd2edbe02ad7be5a
    and commit abc6071b7a8af354a56c91e4caecd8afc79f55cc.
    
    some of the removed fields are usefull,
    m_bConverted should be checked by callers
    
    fixed 2 bugs and added test cases:
    * extended attributes with more than 2 sections were not parsed
    * extended attributes with more than 1 section were not parsed
      if there was an other attribute
    
    Change-Id: I61ab2af7c5151ef1bcd80cc159fa2b99559374a8
    Reviewed-on: https://gerrit.libreoffice.org/36913
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Stephan Bergmann <sbergman at redhat.com>

diff --git a/include/tools/inetmime.hxx b/include/tools/inetmime.hxx
index e2c58b5ea056..cd66173a8d6c 100644
--- a/include/tools/inetmime.hxx
+++ b/include/tools/inetmime.hxx
@@ -30,6 +30,16 @@
 
 struct INetContentTypeParameter
 {
+    /** The optional character set specification (see RFC 2231), in US-ASCII
+        encoding and converted to lower case.
+     */
+    OString m_sCharset;
+
+    /** The optional language specification (see RFC 2231), in US-ASCII
+        encoding and converted to lower case.
+     */
+    OString m_sLanguage;
+
     /** The attribute value.  If the value is a quoted-string, it is
         'unpacked.'  If a character set is specified, and the value can be
         converted to Unicode, this is done.  Also, if no character set is
@@ -49,9 +59,18 @@ struct INetContentTypeParameter
      */
     OUString m_sValue;
 
+    /** This is true if the value is successfully converted to Unicode, and
+        false if the value is a special mixture of ISO-LATIN-1 characters and
+        characters from Unicode's Private Use Area.
+     */
+    bool m_bConverted;
 };
 
-// the key is the m_sAttribute again; all keys are lower case:
+/** The key is the name of the attribute, in US-ASCII encoding and converted
+    to lower case.  If a parameter value is split as described in RFC 2231,
+    there will only be one item for the complete parameter, with the attribute
+    name lacking any section suffix.
+ */
 typedef std::unordered_map<OString, INetContentTypeParameter, OStringHash>
     INetContentTypeParameterList;
 
diff --git a/tools/qa/cppunit/test_inetmime.cxx b/tools/qa/cppunit/test_inetmime.cxx
index 1a5d16b5aba6..8b953526778c 100644
--- a/tools/qa/cppunit/test_inetmime.cxx
+++ b/tools/qa/cppunit/test_inetmime.cxx
@@ -35,11 +35,13 @@ namespace
     public:
         void test_decodeHeaderFieldBody();
 
-        void test_scanContentType();
+        void test_scanContentType_basic();
+        void test_scanContentType_rfc2231();
 
         CPPUNIT_TEST_SUITE(Test);
         CPPUNIT_TEST(test_decodeHeaderFieldBody);
-        CPPUNIT_TEST(test_scanContentType);
+        CPPUNIT_TEST(test_scanContentType_basic);
+        CPPUNIT_TEST(test_scanContentType_rfc2231);
         CPPUNIT_TEST_SUITE_END();
     };
 
@@ -56,17 +58,61 @@ namespace
         CPPUNIT_ASSERT(testDecode("=?iso-8859-1?B?QUJD?=", "ABC"));
     }
 
-    void Test::test_scanContentType()
+    void Test::test_scanContentType_basic()
     {
         {
             OUString input
-                = "TEST/subTST; parm1*0*=US-ASCII'En'5%25%20; Parm1*1*=of%2010";
+                = "TEST/subTST; parm1=Value1; Parm2=\"unpacked value; %20\"";
+            // Just scan input for valid string:
+            auto end = INetMIME::scanContentType(input.getStr(), input.getStr()+input.getLength());
+            CPPUNIT_ASSERT(end != nullptr);
+            CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
+            // Scan input and parse type, subType and parameters:
+            OUString type;
+            OUString subType;
+            INetContentTypeParameterList parameters;
+            end = INetMIME::scanContentType(input.getStr(), input.getStr() + input.getLength(),
+                                            &type, &subType, &parameters);
+            CPPUNIT_ASSERT(end != nullptr);
+            CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
+            CPPUNIT_ASSERT_EQUAL(OUString("test"), type);
+            CPPUNIT_ASSERT_EQUAL(OUString("subtst"), subType);
+            CPPUNIT_ASSERT_EQUAL(
+                INetContentTypeParameterList::size_type(2), parameters.size());
+            auto i = parameters.find("parm1");
+            CPPUNIT_ASSERT(i != parameters.end());
+            CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sLanguage);
+            CPPUNIT_ASSERT_EQUAL(OUString("Value1"), i->second.m_sValue);
+            CPPUNIT_ASSERT(i->second.m_bConverted);
+            i = parameters.find("parm2");
+            CPPUNIT_ASSERT(i != parameters.end());
+            CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sLanguage);
+            CPPUNIT_ASSERT_EQUAL(OUString("unpacked value; %20"), i->second.m_sValue);
+            CPPUNIT_ASSERT(i->second.m_bConverted);
+        }
+    }
+
+    void Test::test_scanContentType_rfc2231()
+    {
+        {
+            // Test extended parameter with value split in 3 sections:
+            OUString input
+                = "TEST/subTST; "
+                  "parm1*0*=US-ASCII'En'5%25%20; "
+                  "Parm1*1*=of%2010;\t"
+                  "parm1*2*=%20%3d%200.5";
+            // Just scan input for valid string:
+            auto end = INetMIME::scanContentType(input.getStr(), input.getStr()+input.getLength());
+            CPPUNIT_ASSERT(end != nullptr);
+            CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
+            // Scan input and parse type, subType and parameters:
             OUString type;
             OUString subType;
             INetContentTypeParameterList parameters;
-            auto end = INetMIME::scanContentType(
-                input.getStr(), input.getStr() + input.getLength(), &type,
-                &subType, &parameters);
+            end = INetMIME::scanContentType(input.getStr(), input.getStr() + input.getLength(),
+                                            &type, &subType, &parameters);
             CPPUNIT_ASSERT(end != nullptr);
             CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
             CPPUNIT_ASSERT_EQUAL(OUString("test"), type);
@@ -75,7 +121,48 @@ namespace
                 INetContentTypeParameterList::size_type(1), parameters.size());
             auto i = parameters.find("parm1");
             CPPUNIT_ASSERT(i != parameters.end());
-            CPPUNIT_ASSERT_EQUAL(OUString("5% of 10"), i->second.m_sValue);
+            CPPUNIT_ASSERT_EQUAL(OString("us-ascii"), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString("en"), i->second.m_sLanguage);
+            CPPUNIT_ASSERT_EQUAL(OUString("5% of 10 = 0.5"), i->second.m_sValue);
+            CPPUNIT_ASSERT(i->second.m_bConverted);
+
+            // Test extended parameters with different value charsets:
+            input = "TEST/subTST;"
+                    "parm1*0*=us-ascii'en'value;PARM1*1*=1;"
+                    "parm2*0*=WINDOWS-1250'en-GB'value2%20%80;"
+                    "parm3*0*=UNKNOWN'EN'value3";
+            // Just scan input for valid string:
+            end = INetMIME::scanContentType(input.getStr(), input.getStr()+input.getLength());
+            CPPUNIT_ASSERT(end != nullptr);
+            CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
+            // Scan input and parse type, subType and parameters:
+            end = INetMIME::scanContentType(input.getStr(), input.getStr() + input.getLength(),
+                                            &type, &subType, &parameters);
+            CPPUNIT_ASSERT(end != nullptr);
+            CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
+            CPPUNIT_ASSERT_EQUAL(OUString("test"), type);
+            CPPUNIT_ASSERT_EQUAL(OUString("subtst"), subType);
+            CPPUNIT_ASSERT_EQUAL(
+                INetContentTypeParameterList::size_type(3), parameters.size());
+            i = parameters.find("parm1");
+            CPPUNIT_ASSERT(i != parameters.end());
+            CPPUNIT_ASSERT_EQUAL(OString("us-ascii"), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString("en"), i->second.m_sLanguage);
+            CPPUNIT_ASSERT_EQUAL(OUString("value1"), i->second.m_sValue);
+            CPPUNIT_ASSERT(i->second.m_bConverted);
+            i = parameters.find("parm2");
+            CPPUNIT_ASSERT(i != parameters.end());
+            CPPUNIT_ASSERT_EQUAL(OString("windows-1250"), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString("en-gb"), i->second.m_sLanguage);
+            // Euro currency sign, windows-1250 x80 is converted to unicode u20AC:
+            CPPUNIT_ASSERT_EQUAL(OUString(u"value2 \u20AC"), i->second.m_sValue);
+            CPPUNIT_ASSERT(i->second.m_bConverted);
+            i = parameters.find("parm3");
+            CPPUNIT_ASSERT(i != parameters.end());
+            CPPUNIT_ASSERT_EQUAL(OString("unknown"), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString("en"), i->second.m_sLanguage);
+            // Convertion fails for unknown charsets:
+            CPPUNIT_ASSERT(!i->second.m_bConverted);
         }
     }
 
diff --git a/tools/source/inet/inetmime.cxx b/tools/source/inet/inetmime.cxx
index 35afc28b73cb..e95ebd56ef76 100644
--- a/tools/source/inet/inetmime.cxx
+++ b/tools/source/inet/inetmime.cxx
@@ -369,12 +369,14 @@ struct Parameter
     Parameter * m_pNext;
     OString m_aAttribute;
     OString m_aCharset;
+    OString m_aLanguage;
     OString m_aValue;
     sal_uInt32 m_nSection;
     bool m_bExtended;
 
     inline Parameter(Parameter * pTheNext, const OString& rTheAttribute,
                      const OString& rTheCharset,
+                     const OString& rTheLanguage,
                      const OString& rTheValue, sal_uInt32 nTheSection,
                      bool bTheExtended);
 };
@@ -382,11 +384,13 @@ struct Parameter
 inline Parameter::Parameter(Parameter * pTheNext,
                             const OString& rTheAttribute,
                             const OString& rTheCharset,
+                            const OString& rTheLanguage,
                             const OString& rTheValue,
                             sal_uInt32 nTheSection, bool bTheExtended):
     m_pNext(pTheNext),
     m_aAttribute(rTheAttribute),
     m_aCharset(rTheCharset),
+    m_aLanguage(rTheLanguage),
     m_aValue(rTheValue),
     m_nSection(nTheSection),
     m_bExtended(bTheExtended)
@@ -439,16 +443,16 @@ Parameter ** ParameterList::find(const OString& rAttribute,
     for (; *p; p = &(*p)->m_pNext)
     {
         sal_Int32 nCompare = rAttribute.compareTo((*p)->m_aAttribute);
-        if (nCompare > 0)
-            return &(*p)->m_pNext;
+        if (nCompare < 0)
+            break;
         else if (nCompare == 0)
         {
-            if (nSection > (*p)->m_nSection)
-                return &(*p)->m_pNext;
+            if (nSection < (*p)->m_nSection)
+                break;
             else if (nSection == (*p)->m_nSection)
             {
                 rPresent = true;
-                return p;
+                break;
             }
         }
     }
@@ -537,8 +541,9 @@ bool parseParameters(ParameterList const & rInput,
                         break;
                 };
             }
-            INetContentTypeParameter x {aValue}; // workaround ICE in VisualStudio2013
-            auto const ret = pOutput->insert({p->m_aAttribute, x });
+            auto const ret = pOutput->insert(
+                {p->m_aAttribute,
+                 {p->m_aCharset, p->m_aLanguage, aValue, !bBadEncoding}});
             SAL_INFO_IF(!ret.second, "tools",
                 "INetMIME: dropping duplicate parameter: " << p->m_aAttribute);
             p = pNext;
@@ -877,7 +882,7 @@ sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
                     RTL_TEXTENCODING_UTF8);
         }
 
-        *pPos = new Parameter(*pPos, aAttribute, aCharset, aValue,
+        *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue,
                               nSection, bExtended);
     }
     return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;


More information about the Libreoffice-commits mailing list