[Libreoffice-commits] online.git: Branch 'distro/collabora/collabora-online-cd-3-2' - common/Session.cpp common/Util.cpp common/Util.hpp kit/Kit.cpp wsd/DocumentBroker.cpp wsd/LOOLWSD.cpp wsd/LOOLWSD.hpp wsd/Storage.cpp

Ashod Nakashian ashod.nakashian at collabora.co.uk
Thu Jul 12 21:51:26 UTC 2018


 common/Session.cpp     |    4 ++++
 common/Util.cpp        |   32 ++++++++++++++++++++++++++++----
 common/Util.hpp        |    7 +++++++
 kit/Kit.cpp            |    8 ++++----
 wsd/DocumentBroker.cpp |    9 ++++-----
 wsd/LOOLWSD.cpp        |    3 ++-
 wsd/LOOLWSD.hpp        |    6 +-----
 wsd/Storage.cpp        |    8 ++++++++
 8 files changed, 58 insertions(+), 19 deletions(-)

New commits:
commit 3ee643a6f3e24951dc2748a2adceea646d125d35
Author: Ashod Nakashian <ashod.nakashian at collabora.co.uk>
Date:   Tue Jul 10 23:09:27 2018 -0400

    wsd: anonymize filename by using the WOPI file ID
    
    Change-Id: I869cae3846c8630b192246bc68cc90e70c50d1fd
    Reviewed-on: https://gerrit.libreoffice.org/57254
    Reviewed-by: Jan Holesovsky <kendy at collabora.com>
    Tested-by: Jan Holesovsky <kendy at collabora.com>

diff --git a/common/Session.cpp b/common/Session.cpp
index dc538ca89..7e174e2e3 100644
--- a/common/Session.cpp
+++ b/common/Session.cpp
@@ -163,6 +163,10 @@ void Session::parseDocOptions(const std::vector<std::string>& tokens, int& part,
         }
     }
 
+    Util::mapAnonymized(_userId, _userIdAnonym);
+    Util::mapAnonymized(_userName, _userNameAnonym);
+    Util::mapAnonymized(_jailedFilePath, _jailedFilePathAnonym);
+
     if (tokens.size() > offset)
     {
         if (getTokenString(tokens[offset], "options", _docOptions))
diff --git a/common/Util.cpp b/common/Util.cpp
index 30fbc20fe..b481d9e88 100644
--- a/common/Util.cpp
+++ b/common/Util.cpp
@@ -545,12 +545,26 @@ namespace Util
 
     static std::map<std::string, std::string> AnonymizedStrings;
     static std::atomic<unsigned> AnonymizationSalt(0);
+    static std::mutex AnonymizedMutex;
+
+    void mapAnonymized(const std::string& plain, const std::string& anonymized)
+    {
+        LOG_TRC("Anonymizing [" << plain << "] -> [" << anonymized << "].");
+
+        std::unique_lock<std::mutex> lock(AnonymizedMutex);
+
+        AnonymizedStrings[plain] = anonymized;
+    }
 
     std::string anonymize(const std::string& text)
     {
-        const auto it = AnonymizedStrings.find(text);
-        if (it != AnonymizedStrings.end())
-            return it->second;
+        {
+            std::unique_lock<std::mutex> lock(AnonymizedMutex);
+
+            const auto it = AnonymizedStrings.find(text);
+            if (it != AnonymizedStrings.end())
+                return it->second;
+        }
 
         // We just need something irreversible, short, and
         // quite simple.
@@ -561,7 +575,7 @@ namespace Util
         // Generate the anonymized string. The '#' is to hint that it's anonymized.
         // Prepend with salt to make it unique, in case we get collisions (which we will, eventually).
         const std::string res = '#' + Util::encodeId(AnonymizationSalt++, 0) + '#' + Util::encodeId(hash, 0) + '#';
-        AnonymizedStrings[text] = res;
+        mapAnonymized(text, res);
         return res;
     }
 
@@ -582,6 +596,16 @@ namespace Util
         return Util::anonymize(basename) + ext;
     }
 
+    std::string getFilenameFromPath(const std::string& path)
+    {
+        const std::size_t mid = path.find_last_of('/');
+        if (mid != std::string::npos)
+            return path.substr(mid + 1);
+
+        // No path, treat as filename only.
+        return path;
+    }
+
     std::string anonymizeUrl(const std::string& url)
     {
         const std::size_t mid = url.find_last_of('/');
diff --git a/common/Util.hpp b/common/Util.hpp
index c0baf188c..d8717b234 100644
--- a/common/Util.hpp
+++ b/common/Util.hpp
@@ -254,9 +254,16 @@ namespace Util
     /// Called on strings to be logged or exposed.
     std::string anonymize(const std::string& text);
 
+    /// Sets the anonymized version of a given plain-text string.
+    /// After this, 'anonymize(plain)' will return 'anonymized'.
+    void mapAnonymized(const std::string& plain, const std::string& anonymized);
+
     /// Anonymize the basename of filenames only, preserving the path and extension.
     std::string anonymizeUrl(const std::string& url);
 
+    /// Extract and return the filename given a path (i.e. the token after last '/').
+    std::string getFilenameFromPath(const std::string& path);
+
     /// Given one or more patterns to allow, and one or more to deny,
     /// the match member will return true if, and only if, the subject
     /// matches the allowed list, but not the deny.
diff --git a/kit/Kit.cpp b/kit/Kit.cpp
index 9b5ef8f48..3c70a8086 100644
--- a/kit/Kit.cpp
+++ b/kit/Kit.cpp
@@ -109,6 +109,7 @@ static LokHookFunction2* initFunction = nullptr;
 #ifndef BUILDING_TESTS
 static bool AnonymizeFilenames = false;
 static bool AnonymizeUsernames = false;
+static std::string ObfuscatedFileId;
 static std::string ObfuscatedUserId;
 #endif
 
@@ -2240,8 +2241,8 @@ void lokit_main(const std::string& childRoot,
                         const std::string& sessionId = tokens[1];
                         const std::string& docKey = tokens[2];
                         const std::string& docId = tokens[3];
-                        if (tokens.size() > 4)
-                            ObfuscatedUserId = tokens[4];
+                        const std::string fileId = Util::getFilenameFromPath(docKey);
+                        Util::mapAnonymized(fileId, fileId); // Identity mapping, since fileId is already obfuscated
 
                         std::string url;
                         URI::decode(docKey, url);
@@ -2428,8 +2429,7 @@ std::string anonymizeUrl(const std::string& url)
 std::string anonymizeUsername(const std::string& username)
 {
 #ifndef BUILDING_TESTS
-    if (AnonymizeUsernames)
-        return !ObfuscatedUserId.empty() ? ObfuscatedUserId : Util::anonymize(username);
+    return AnonymizeUsernames ? Util::anonymize(username) : username;
 #endif
     return username;
 }
diff --git a/wsd/DocumentBroker.cpp b/wsd/DocumentBroker.cpp
index 6ad82d6b3..a0ec6a64c 100644
--- a/wsd/DocumentBroker.cpp
+++ b/wsd/DocumentBroker.cpp
@@ -466,7 +466,6 @@ bool DocumentBroker::load(const std::shared_ptr<ClientSession>& session, const s
     {
         std::unique_ptr<WopiStorage::WOPIFileInfo> wopifileinfo = wopiStorage->getWOPIFileInfo(session->getAuthorization());
         userId = wopifileinfo->_userId;
-        LOOLWSD::ObfuscatedUserId = wopifileinfo->_obfuscatedUserId;
         username = wopifileinfo->_username;
         userExtraInfo = wopifileinfo->_userExtraInfo;
         watermarkText = wopifileinfo->_watermarkText;
@@ -1050,7 +1049,7 @@ size_t DocumentBroker::addSessionInternal(const std::shared_ptr<ClientSession>&
     const auto id = session->getId();
 
     // Request a new session from the child kit.
-    const std::string aMessage = "session " + id + ' ' + _docKey + ' ' + _docId + ' ' + LOOLWSD::ObfuscatedUserId;
+    const std::string aMessage = "session " + id + ' ' + _docKey + ' ' + _docId;
     _childProcess->sendTextFrame(aMessage);
 
     // Tell the admin console about this new doc
@@ -1657,7 +1656,7 @@ void DocumentBroker::dumpState(std::ostream& os)
     uint64_t sent, recv;
     getIOStats(sent, recv);
 
-    os << " Broker: " << _filename << " pid: " << getPid();
+    os << " Broker: " << LOOLWSD::anonymizeUrl(_filename) << " pid: " << getPid();
     if (_markToDestroy)
         os << " *** Marked to destroy ***";
     else
@@ -1670,9 +1669,9 @@ void DocumentBroker::dumpState(std::ostream& os)
     os << "\n  recv: " << recv;
     os << "\n  modified?: " << _isModified;
     os << "\n  jail id: " << _jailId;
-    os << "\n  filename: " << _filename;
+    os << "\n  filename: " << LOOLWSD::anonymizeUrl(_filename);
     os << "\n  public uri: " << _uriPublic.toString();
-    os << "\n  jailed uri: " << _uriJailed;
+    os << "\n  jailed uri: " << LOOLWSD::anonymizeUrl(_uriJailed);
     os << "\n  doc key: " << _docKey;
     os << "\n  doc id: " << _docId;
     os << "\n  num sessions: " << _sessions.size();
diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp
index e05134068..0674cd3ae 100644
--- a/wsd/LOOLWSD.cpp
+++ b/wsd/LOOLWSD.cpp
@@ -589,7 +589,6 @@ std::string LOOLWSD::ConfigDir = LOOLWSD_CONFIGDIR "/conf.d";
 std::string LOOLWSD::LogLevel = "trace";
 bool LOOLWSD::AnonymizeFilenames = false;
 bool LOOLWSD::AnonymizeUsernames = false;
-std::string LOOLWSD::ObfuscatedUserId;
 Util::RuntimeConstant<bool> LOOLWSD::SSLEnabled;
 Util::RuntimeConstant<bool> LOOLWSD::SSLTermination;
 std::set<std::string> LOOLWSD::EditFileExtensions;
@@ -2352,6 +2351,8 @@ private:
 
             const auto uriPublic = DocumentBroker::sanitizeURI(url);
             const auto docKey = DocumentBroker::getDocKey(uriPublic);
+            const std::string fileId = Util::getFilenameFromPath(docKey);
+            Util::mapAnonymized(fileId, fileId); // Identity mapping, since fileId is already obfuscated
             LOG_INF("Sanitized URI [" << LOOLWSD::anonymizeUrl(url) << "] to [" << LOOLWSD::anonymizeUrl(uriPublic.toString()) <<
                     "] and mapped to docKey [" << LOOLWSD::anonymizeUrl(docKey) << "] for session [" << _id << "].");
 
diff --git a/wsd/LOOLWSD.hpp b/wsd/LOOLWSD.hpp
index d3c026375..536f8d051 100644
--- a/wsd/LOOLWSD.hpp
+++ b/wsd/LOOLWSD.hpp
@@ -61,7 +61,6 @@ public:
     static std::string LogLevel;
     static bool AnonymizeFilenames;
     static bool AnonymizeUsernames;
-    static std::string ObfuscatedUserId;
     static std::atomic<unsigned> NumConnections;
     static bool TileCachePersistent;
     static std::unique_ptr<TraceFileWriter> TraceDumper;
@@ -155,10 +154,7 @@ public:
     /// Will use the Obfuscated User ID if one is provied via WOPI.
     static std::string anonymizeUsername(const std::string& username)
     {
-        if (AnonymizeUsernames)
-            return !ObfuscatedUserId.empty() ? ObfuscatedUserId : Util::anonymize(username);
-
-        return username;
+        return AnonymizeUsernames ? Util::anonymize(username) : username;
     }
 
 protected:
diff --git a/wsd/Storage.cpp b/wsd/Storage.cpp
index 1b8c2fa7b..f71e33455 100644
--- a/wsd/Storage.cpp
+++ b/wsd/Storage.cpp
@@ -470,7 +470,15 @@ std::unique_ptr<WopiStorage::WOPIFileInfo> WopiStorage::getWOPIFileInfo(const Au
         // Anonymize key values.
         if (LOOLWSD::AnonymizeFilenames || LOOLWSD::AnonymizeUsernames)
         {
+            Util::mapAnonymized(filename, Util::getFilenameFromPath(_uri.toString()));
+
             JsonUtil::findJSONValue(object, "ObfuscatedUserId", obfuscatedUserId, false);
+            if (!obfuscatedUserId.empty())
+            {
+                Util::mapAnonymized(ownerId, obfuscatedUserId);
+                Util::mapAnonymized(userId, obfuscatedUserId);
+                Util::mapAnonymized(userName, obfuscatedUserId);
+            }
 
             // Set anonymized version of the above fields before logging.
             // Note: anonymization caches the result, so we don't need to store here.


More information about the Libreoffice-commits mailing list