[Libreoffice-commits] online.git: common/Util.cpp common/Util.hpp kit/Kit.cpp loolwsd.xml.in test/WhiteBoxTests.cpp wsd/LOOLWSD.cpp wsd/LOOLWSD.hpp

Ashod Nakashian (via logerrit) logerrit at kemper.freedesktop.org
Sat Aug 17 02:26:55 UTC 2019


 common/Util.cpp        |   37 ++++++++++++++++---------
 common/Util.hpp        |    8 ++---
 kit/Kit.cpp            |   13 ++++++--
 loolwsd.xml.in         |    3 +-
 test/WhiteBoxTests.cpp |   72 +++++++++++++++++++++++++++++++++++--------------
 wsd/LOOLWSD.cpp        |    8 ++++-
 wsd/LOOLWSD.hpp        |    6 ++--
 7 files changed, 103 insertions(+), 44 deletions(-)

New commits:
commit 919a93cd4b123f3757ce1ae454ed622019d1d02c
Author:     Ashod Nakashian <ashod.nakashian at collabora.co.uk>
AuthorDate: Sun Apr 14 12:21:19 2019 -0400
Commit:     Ashod Nakashian <ashnakash at gmail.com>
CommitDate: Sat Aug 17 04:26:35 2019 +0200

    wsd: improved anonymization algorithm
    
    Better hashing algorithm based on FNV-1a.
    Adds support for salting the hash, and
    for providing salt via configuration.
    
    More unit-tests added, and better formatting.
    
    Change-Id: I2be42675d0cdbaa73c3d7faed99e07631a9c20fc
    Reviewed-on: https://gerrit.libreoffice.org/70034
    Reviewed-by: Ashod Nakashian <ashnakash at gmail.com>
    Tested-by: Ashod Nakashian <ashnakash at gmail.com>
    Reviewed-on: https://gerrit.libreoffice.org/71091

diff --git a/common/Util.cpp b/common/Util.cpp
index bb3983239..774b522db 100644
--- a/common/Util.cpp
+++ b/common/Util.cpp
@@ -304,16 +304,16 @@ namespace Util
         return true;
     }
 
-    std::string encodeId(const unsigned number, const int padding)
+    std::string encodeId(const std::uint64_t number, const int padding)
     {
         std::ostringstream oss;
         oss << std::hex << std::setw(padding) << std::setfill('0') << number;
         return oss.str();
     }
 
-    unsigned decodeId(const std::string& str)
+    std::uint64_t decodeId(const std::string& str)
     {
-        unsigned id = 0;
+        std::uint64_t id = 0;
         std::stringstream ss;
         ss << std::hex << str;
         ss >> id;
@@ -684,7 +684,7 @@ namespace Util
     }
 
     static std::map<std::string, std::string> AnonymizedStrings;
-    static std::atomic<unsigned> AnonymizationSalt(0);
+    static std::atomic<unsigned> AnonymizationCounter(0);
     static std::mutex AnonymizedMutex;
 
     void mapAnonymized(const std::string& plain, const std::string& anonymized)
@@ -701,7 +701,7 @@ namespace Util
         AnonymizedStrings[plain] = anonymized;
     }
 
-    std::string anonymize(const std::string& text)
+    std::string anonymize(const std::string& text, const std::uint64_t nAnonymizationSalt)
     {
         {
             std::unique_lock<std::mutex> lock(AnonymizedMutex);
@@ -716,15 +716,26 @@ namespace Util
             }
         }
 
-        // We just need something irreversible, short, and
-        // quite simple.
-        std::size_t hash = 0;
+        // Modified 64-bit FNV-1a to add salting.
+        // For the algorithm and the magic numbers, see http://isthe.com/chongo/tech/comp/fnv/
+        std::uint64_t hash = 0xCBF29CE484222325LL;
+        hash ^= nAnonymizationSalt;
+        hash *= 0x100000001b3ULL;
         for (const char c : text)
-            hash += c;
+        {
+            hash ^= static_cast<std::uint64_t>(c);
+            hash *= 0x100000001b3ULL;
+        }
+
+        hash ^= nAnonymizationSalt;
+        hash *= 0x100000001b3ULL;
 
         // Generate the anonymized string. The '#' is to hint that it's anonymized.
-        // Prepend with salt to make it unique, in case we get collisions (which we will, eventually).
-        const std::string res = '#' + Util::encodeId(AnonymizationSalt++, 0) + '#' + Util::encodeId(hash, 0) + '#';
+        // Prepend with count to make it unique within a single process instance,
+        // in case we get collisions (which we will, eventually). N.B.: Identical
+        // strings likely to have different prefixes when logged in WSD process vs. Kit.
+        const std::string res
+            = '#' + Util::encodeId(AnonymizationCounter++, 0) + '#' + Util::encodeId(hash, 0) + '#';
         mapAnonymized(text, res);
         return res;
     }
@@ -739,7 +750,7 @@ namespace Util
         return filename;
     }
 
-    std::string anonymizeUrl(const std::string& url)
+    std::string anonymizeUrl(const std::string& url, const std::uint64_t nAnonymizationSalt)
     {
         std::string base;
         std::string filename;
@@ -747,7 +758,7 @@ namespace Util
         std::string params;
         std::tie(base, filename, ext, params) = Util::splitUrl(url);
 
-        return base + Util::anonymize(filename) + ext + params;
+        return base + Util::anonymize(filename, nAnonymizationSalt) + ext + params;
     }
 
     std::string getHttpTimeNow()
diff --git a/common/Util.hpp b/common/Util.hpp
index 9e01181fa..e16479f68 100644
--- a/common/Util.hpp
+++ b/common/Util.hpp
@@ -74,9 +74,9 @@ namespace Util
     /// Hex to unsigned char
     bool dataFromHexString(const std::string& hexString, std::vector<unsigned char>& data);
     /// Encode an integral ID into a string, with padding support.
-    std::string encodeId(const unsigned number, const int padding = 5);
+    std::string encodeId(const std::uint64_t number, const int padding = 5);
     /// Decode an integral ID from a string.
-    unsigned decodeId(const std::string& str);
+    std::uint64_t decodeId(const std::string& str);
 
     bool windowingAvailable();
 
@@ -779,14 +779,14 @@ int main(int argc, char**argv)
 
     /// Anonymize a sensitive string to avoid leaking it.
     /// Called on strings to be logged or exposed.
-    std::string anonymize(const std::string& text);
+    std::string anonymize(const std::string& text, const std::uint64_t nAnonymizationSalt);
 
     /// Sets the anonymized version of a given plain-text string.
     /// After this, 'anonymize(plain)' will return 'anonymized'.
     void mapAnonymized(const std::string& plain, const std::string& anonymized);
 
     /// Anonymize the basename of filenames only, preserving the path and extension.
-    std::string anonymizeUrl(const std::string& url);
+    std::string anonymizeUrl(const std::string& url, const std::uint64_t nAnonymizationSalt);
 
     /// Extract and return the filename given a url or path.
     std::string getFilenameFromURL(const std::string& url);
diff --git a/kit/Kit.cpp b/kit/Kit.cpp
index 425c0b5fb..75d208e64 100644
--- a/kit/Kit.cpp
+++ b/kit/Kit.cpp
@@ -113,6 +113,7 @@ class Document;
 static std::shared_ptr<Document> document;
 #ifndef BUILDING_TESTS
 static bool AnonymizeUserData = false;
+static uint64_t AnonymizationSalt = 82589933;
 static std::string ObfuscatedFileId;
 #endif
 
@@ -2467,7 +2468,13 @@ void lokit_main(
         LOG_INF("Setting log-level to [trace] and delaying setting to configured [" << LogLevel << "] until after Kit initialization.");
     }
 
-    AnonymizeUserData = std::getenv("LOOL_ANONYMIZE_USER_DATA") != nullptr;
+    const char* pAnonymizationSalt = std::getenv("LOOL_ANONYMIZATION_SALT");
+    if (pAnonymizationSalt)
+    {
+        AnonymizationSalt = std::stoull(std::string(pAnonymizationSalt));
+        AnonymizeUserData = true;
+    }
+
     LOG_INF("User-data anonymization is " << (AnonymizeUserData ? "enabled." : "disabled."));
 
     assert(!childRoot.empty());
@@ -2779,7 +2786,7 @@ void lokit_main(
 std::string anonymizeUrl(const std::string& url)
 {
 #ifndef BUILDING_TESTS
-    return AnonymizeUserData ? Util::anonymizeUrl(url) : url;
+    return AnonymizeUserData ? Util::anonymizeUrl(url, AnonymizationSalt) : url;
 #else
     return url;
 #endif
@@ -2873,7 +2880,7 @@ bool globalPreinit(const std::string &loTemplate)
 std::string anonymizeUsername(const std::string& username)
 {
 #ifndef BUILDING_TESTS
-    return AnonymizeUserData ? Util::anonymize(username) : username;
+    return AnonymizeUserData ? Util::anonymize(username, AnonymizationSalt) : username;
 #else
     return username;
 #endif
diff --git a/loolwsd.xml.in b/loolwsd.xml.in
index d9b6cf922..8475a23d3 100644
--- a/loolwsd.xml.in
+++ b/loolwsd.xml.in
@@ -53,7 +53,8 @@
             <property name="flush" desc="Enable/disable flushing after logging each line. May harm performance. Note that without flushing after each line, the log lines from the different processes will not appear in chronological order.">false</property>
         </file>
         <anonymize>
-            <anonymize_user_data type="bool" desc="Enable to anonymize/obfuscate of user data in logs. If default is true, it was forced at compile-time and cannot be disabled." default="@LOOLWSD_ANONYMIZE_USER_DATA@">@LOOLWSD_ANONYMIZE_USER_DATA@</anonymize_user_data>
+            <anonymize_user_data type="bool" desc="Enable to anonymize/obfuscate of user-data in logs. If default is true, it was forced at compile-time and cannot be disabled." default="@LOOLWSD_ANONYMIZE_USER_DATA@">@LOOLWSD_ANONYMIZE_USER_DATA@</anonymize_user_data>
+            <anonymization_salt type="uint" desc="The salt used to anonymize/obfuscate user-data in logs. Use a secret 64-bit random number." default="82589933">82589933</anonymization_salt>
         </anonymize>
     </logging>
 
diff --git a/test/WhiteBoxTests.cpp b/test/WhiteBoxTests.cpp
index 8c5bdc70c..39aa197e3 100644
--- a/test/WhiteBoxTests.cpp
+++ b/test/WhiteBoxTests.cpp
@@ -669,35 +669,67 @@ void WhiteBoxTests::testJson()
 void WhiteBoxTests::testAnonymization()
 {
     static const std::string name = "some name with space";
-    CPPUNIT_ASSERT_EQUAL(std::string("#0#77d#"), Util::anonymizeUrl(name));
+    static const std::string filename = "filename.ext";
+    static const std::string filenameTestx = "testx (6).odt";
+    static const std::string path = "/path/to/filename.ext";
+    static const std::string plainUrl
+        = "http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/"
+          "736_ocgdpzbkm39u?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_token_ttl=0&"
+          "permission=edit";
+    static const std::string fileUrl = "http://localhost/owncloud/index.php/apps/richdocuments/"
+                                       "wopi/files/736_ocgdpzbkm39u/"
+                                       "secret.odt?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&"
+                                       "access_token_ttl=0&permission=edit";
+
+    std::uint64_t nAnonymizationSalt = 1111111111182589933;
+
+    CPPUNIT_ASSERT_EQUAL(std::string("#0#5e45aef91248a8aa#"),
+                         Util::anonymizeUrl(name, nAnonymizationSalt));
+    CPPUNIT_ASSERT_EQUAL(std::string("#1#8f8d95bd2a202d00#.odt"),
+                         Util::anonymizeUrl(filenameTestx, nAnonymizationSalt));
+    CPPUNIT_ASSERT_EQUAL(std::string("/path/to/#2#5c872b2d82ecc8a0#.ext"),
+                         Util::anonymizeUrl(path, nAnonymizationSalt));
+    CPPUNIT_ASSERT_EQUAL(
+        std::string("http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/"
+                    "#3#22c6f0caad277666#?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_"
+                    "token_ttl=0&permission=edit"),
+        Util::anonymizeUrl(plainUrl, nAnonymizationSalt));
+    CPPUNIT_ASSERT_EQUAL(
+        std::string("http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/"
+                    "736_ocgdpzbkm39u/"
+                    "#4#294f0dfb18f6a80b#.odt?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_"
+                    "token_ttl=0&permission=edit"),
+        Util::anonymizeUrl(fileUrl, nAnonymizationSalt));
+
+    nAnonymizationSalt = 0;
+
+    CPPUNIT_ASSERT_EQUAL(std::string("#0#5e45aef91248a8aa#"), Util::anonymizeUrl(name, nAnonymizationSalt));
     Util::mapAnonymized(name, name);
-    CPPUNIT_ASSERT_EQUAL(name, Util::anonymizeUrl(name));
+    CPPUNIT_ASSERT_EQUAL(name, Util::anonymizeUrl(name, nAnonymizationSalt));
 
-    static const std::string filename = "filename.ext";
-    CPPUNIT_ASSERT_EQUAL(std::string("#1#341#.ext"), Util::anonymizeUrl(filename));
-    Util::mapAnonymized("filename", "filename");
-    CPPUNIT_ASSERT_EQUAL(name, Util::anonymizeUrl(name));
+    CPPUNIT_ASSERT_EQUAL(std::string("#2#5c872b2d82ecc8a0#.ext"),
+                         Util::anonymizeUrl(filename, nAnonymizationSalt));
+    Util::mapAnonymized("filename", "filename"); // Identity map of the filename without extension.
+    CPPUNIT_ASSERT_EQUAL(filename, Util::anonymizeUrl(filename, nAnonymizationSalt));
 
-    static const std::string filenameTestx = "testx (6).odt";
-    CPPUNIT_ASSERT_EQUAL(std::string("#2#2df#.odt"), Util::anonymizeUrl(filenameTestx));
-    Util::mapAnonymized("testx (6)", "testx (6)");
-    CPPUNIT_ASSERT_EQUAL(filenameTestx, Util::anonymizeUrl(filenameTestx));
+    CPPUNIT_ASSERT_EQUAL(std::string("#1#8f8d95bd2a202d00#.odt"),
+                         Util::anonymizeUrl(filenameTestx, nAnonymizationSalt));
+    Util::mapAnonymized("testx (6)",
+                        "testx (6)"); // Identity map of the filename without extension.
+    CPPUNIT_ASSERT_EQUAL(filenameTestx, Util::anonymizeUrl(filenameTestx, nAnonymizationSalt));
 
-    static const std::string path = "/path/to/filename.ext";
-    CPPUNIT_ASSERT_EQUAL(path, Util::anonymizeUrl(path));
+    CPPUNIT_ASSERT_EQUAL(path, Util::anonymizeUrl(path, nAnonymizationSalt));
 
-    static const std::string plainUrl = "http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/736_ocgdpzbkm39u?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_token_ttl=0&permission=edit";
-    const std::string urlAnonymized = Util::replace(plainUrl, "736_ocgdpzbkm39u", "#3#5a1#");
-    CPPUNIT_ASSERT_EQUAL(urlAnonymized, Util::anonymizeUrl(plainUrl));
+    const std::string urlAnonymized = Util::replace(plainUrl, "736_ocgdpzbkm39u", "#3#22c6f0caad277666#");
+    CPPUNIT_ASSERT_EQUAL(urlAnonymized, Util::anonymizeUrl(plainUrl, nAnonymizationSalt));
     Util::mapAnonymized("736_ocgdpzbkm39u", "736_ocgdpzbkm39u");
-    CPPUNIT_ASSERT_EQUAL(plainUrl, Util::anonymizeUrl(plainUrl));
+    CPPUNIT_ASSERT_EQUAL(plainUrl, Util::anonymizeUrl(plainUrl, nAnonymizationSalt));
 
-    static const std::string fileUrl = "http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/736_ocgdpzbkm39u/secret.odt?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_token_ttl=0&permission=edit";
-    const std::string urlAnonymized2 = Util::replace(fileUrl, "secret", "#4#286#");
-    CPPUNIT_ASSERT_EQUAL(urlAnonymized2, Util::anonymizeUrl(fileUrl));
+    const std::string urlAnonymized2 = Util::replace(fileUrl, "secret", "#4#294f0dfb18f6a80b#");
+    CPPUNIT_ASSERT_EQUAL(urlAnonymized2, Util::anonymizeUrl(fileUrl, nAnonymizationSalt));
     Util::mapAnonymized("secret", "736_ocgdpzbkm39u");
     const std::string urlAnonymized3 = Util::replace(fileUrl, "secret", "736_ocgdpzbkm39u");
-    CPPUNIT_ASSERT_EQUAL(urlAnonymized3, Util::anonymizeUrl(fileUrl));
+    CPPUNIT_ASSERT_EQUAL(urlAnonymized3, Util::anonymizeUrl(fileUrl, nAnonymizationSalt));
 }
 
 CPPUNIT_TEST_SUITE_REGISTRATION(WhiteBoxTests);
diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp
index f1878f9a8..dd62d2929 100644
--- a/wsd/LOOLWSD.cpp
+++ b/wsd/LOOLWSD.cpp
@@ -709,6 +709,7 @@ std::string LOOLWSD::ConfigFile = LOOLWSD_CONFIGDIR "/loolwsd.xml";
 std::string LOOLWSD::ConfigDir = LOOLWSD_CONFIGDIR "/conf.d";
 std::string LOOLWSD::LogLevel = "trace";
 bool LOOLWSD::AnonymizeUserData = false;
+std::uint64_t LOOLWSD::AnonymizationSalt = 82589933;
 Util::RuntimeConstant<bool> LOOLWSD::SSLEnabled;
 Util::RuntimeConstant<bool> LOOLWSD::SSLTermination;
 unsigned LOOLWSD::MaxConnections;
@@ -991,7 +992,12 @@ void LOOLWSD::initialize(Application& self)
 
     LOG_INF("Anonymization of user-data is " << (AnonymizeUserData ? "enabled." : "disabled."));
     if (AnonymizeUserData)
-        setenv("LOOL_ANONYMIZE_USER_DATA", "1", true);
+    {
+        // Get the salt, if set, otherwise default, and set as envar, so the kits inherit it.
+        AnonymizationSalt = getConfigValue<std::uint64_t>(conf, "logging.anonymize.anonymization_salt", 82589933);
+        const std::string sAnonymizationSalt = std::to_string(AnonymizationSalt);
+        setenv("LOOL_ANONYMIZATION_SALT", sAnonymizationSalt.c_str(), true);
+    }
 
     {
         std::string proto = getConfigValue<std::string>(conf, "net.proto", "");
diff --git a/wsd/LOOLWSD.hpp b/wsd/LOOLWSD.hpp
index 011aca503..2e6c68e70 100644
--- a/wsd/LOOLWSD.hpp
+++ b/wsd/LOOLWSD.hpp
@@ -67,6 +67,7 @@ public:
     static std::string HostIdentifier; ///< A unique random hash that identifies this server
     static std::string LogLevel;
     static bool AnonymizeUserData;
+    static std::uint64_t AnonymizationSalt;
     static std::atomic<unsigned> NumConnections;
     static std::unique_ptr<TraceFileWriter> TraceDumper;
 #if !MOBILEAPP
@@ -154,14 +155,14 @@ public:
     /// Anonymize the basename of filenames, preserving the path and extension.
     static std::string anonymizeUrl(const std::string& url)
     {
-        return AnonymizeUserData ? Util::anonymizeUrl(url) : url;
+        return AnonymizeUserData ? Util::anonymizeUrl(url, AnonymizationSalt) : url;
     }
 
     /// Anonymize user names and IDs.
     /// Will use the Obfuscated User ID if one is provied via WOPI.
     static std::string anonymizeUsername(const std::string& username)
     {
-        return AnonymizeUserData ? Util::anonymize(username) : username;
+        return AnonymizeUserData ? Util::anonymize(username, AnonymizationSalt) : username;
     }
 
     /// get correct server URL with protocol + port number for this running server
@@ -202,6 +203,7 @@ private:
 
         void operator()(int& value) { value = _config.getInt(_name); }
         void operator()(unsigned int& value) { value = _config.getUInt(_name); }
+        void operator()(unsigned long& value) { value = _config.getUInt64(_name); }
         void operator()(bool& value) { value = _config.getBool(_name); }
         void operator()(std::string& value) { value = _config.getString(_name); }
         void operator()(double& value) { value = _config.getDouble(_name); }


More information about the Libreoffice-commits mailing list