[Libreoffice-commits] online.git: Branch 'distro/collabora/collabora-online-4' - common/Util.cpp common/Util.hpp kit/Kit.cpp loolwsd.xml.in test/WhiteBoxTests.cpp wsd/LOOLWSD.cpp wsd/LOOLWSD.hpp
Libreoffice Gerrit user
logerrit at kemper.freedesktop.org
Sun Apr 14 18:48:25 UTC 2019
common/Util.cpp | 37 ++++++++++++++++---------
common/Util.hpp | 8 ++---
kit/Kit.cpp | 13 ++++++--
loolwsd.xml.in | 3 +-
test/WhiteBoxTests.cpp | 72 +++++++++++++++++++++++++++++++++++--------------
wsd/LOOLWSD.cpp | 8 ++++-
wsd/LOOLWSD.hpp | 6 ++--
7 files changed, 103 insertions(+), 44 deletions(-)
New commits:
commit 75b9ae0b41f26916833b67f5cd7e574be034de92
Author: Ashod Nakashian <ashod.nakashian at collabora.co.uk>
AuthorDate: Sun Apr 14 12:21:19 2019 -0400
Commit: Ashod Nakashian <ashnakash at gmail.com>
CommitDate: Sun Apr 14 20:48:07 2019 +0200
wsd: improved anonymization algorithm
Better hashing algorithm based on FNV-1a.
Adds support for salting the hash, and
for providing salt via configuration.
More unit-tests added, and better formatting.
Change-Id: I2be42675d0cdbaa73c3d7faed99e07631a9c20fc
Reviewed-on: https://gerrit.libreoffice.org/70034
Reviewed-by: Ashod Nakashian <ashnakash at gmail.com>
Tested-by: Ashod Nakashian <ashnakash at gmail.com>
diff --git a/common/Util.cpp b/common/Util.cpp
index 048526a49..a27ed5a22 100644
--- a/common/Util.cpp
+++ b/common/Util.cpp
@@ -274,16 +274,16 @@ namespace Util
return true;
}
- std::string encodeId(const unsigned number, const int padding)
+ std::string encodeId(const std::uint64_t number, const int padding)
{
std::ostringstream oss;
oss << std::hex << std::setw(padding) << std::setfill('0') << number;
return oss.str();
}
- unsigned decodeId(const std::string& str)
+ std::uint64_t decodeId(const std::string& str)
{
- unsigned id = 0;
+ std::uint64_t id = 0;
std::stringstream ss;
ss << std::hex << str;
ss >> id;
@@ -651,7 +651,7 @@ namespace Util
}
static std::map<std::string, std::string> AnonymizedStrings;
- static std::atomic<unsigned> AnonymizationSalt(0);
+ static std::atomic<unsigned> AnonymizationCounter(0);
static std::mutex AnonymizedMutex;
void mapAnonymized(const std::string& plain, const std::string& anonymized)
@@ -666,7 +666,7 @@ namespace Util
AnonymizedStrings[plain] = anonymized;
}
- std::string anonymize(const std::string& text)
+ std::string anonymize(const std::string& text, const std::uint64_t nAnonymizationSalt)
{
{
std::unique_lock<std::mutex> lock(AnonymizedMutex);
@@ -679,15 +679,26 @@ namespace Util
}
}
- // We just need something irreversible, short, and
- // quite simple.
- std::size_t hash = 0;
+ // Modified 64-bit FNV-1a to add salting.
+ // For the algorithm and the magic numbers, see http://isthe.com/chongo/tech/comp/fnv/
+ std::uint64_t hash = 0xCBF29CE484222325LL;
+ hash ^= nAnonymizationSalt;
+ hash *= 0x100000001b3ULL;
for (const char c : text)
- hash += c;
+ {
+ hash ^= static_cast<std::uint64_t>(c);
+ hash *= 0x100000001b3ULL;
+ }
+
+ hash ^= nAnonymizationSalt;
+ hash *= 0x100000001b3ULL;
// Generate the anonymized string. The '#' is to hint that it's anonymized.
- // Prepend with salt to make it unique, in case we get collisions (which we will, eventually).
- const std::string res = '#' + Util::encodeId(AnonymizationSalt++, 0) + '#' + Util::encodeId(hash, 0) + '#';
+ // Prepend with count to make it unique within a single process instance,
+ // in case we get collisions (which we will, eventually). N.B.: Identical
+ // strings likely to have different prefixes when logged in WSD process vs. Kit.
+ const std::string res
+ = '#' + Util::encodeId(AnonymizationCounter++, 0) + '#' + Util::encodeId(hash, 0) + '#';
mapAnonymized(text, res);
return res;
}
@@ -702,7 +713,7 @@ namespace Util
return filename;
}
- std::string anonymizeUrl(const std::string& url)
+ std::string anonymizeUrl(const std::string& url, const std::uint64_t nAnonymizationSalt)
{
std::string base;
std::string filename;
@@ -710,7 +721,7 @@ namespace Util
std::string params;
std::tie(base, filename, ext, params) = Util::splitUrl(url);
- return base + Util::anonymize(filename) + ext + params;
+ return base + Util::anonymize(filename, nAnonymizationSalt) + ext + params;
}
}
diff --git a/common/Util.hpp b/common/Util.hpp
index 07af2a6a4..5bf6d9d03 100644
--- a/common/Util.hpp
+++ b/common/Util.hpp
@@ -67,9 +67,9 @@ namespace Util
/// Hex to unsigned char
bool dataFromHexString(const std::string& hexString, std::vector<unsigned char>& data);
/// Encode an integral ID into a string, with padding support.
- std::string encodeId(const unsigned number, const int padding = 5);
+ std::string encodeId(const std::uint64_t number, const int padding = 5);
/// Decode an integral ID from a string.
- unsigned decodeId(const std::string& str);
+ std::uint64_t decodeId(const std::string& str);
bool windowingAvailable();
@@ -713,14 +713,14 @@ int main(int argc, char**argv)
/// Anonymize a sensitive string to avoid leaking it.
/// Called on strings to be logged or exposed.
- std::string anonymize(const std::string& text);
+ std::string anonymize(const std::string& text, const std::uint64_t nAnonymizationSalt);
/// Sets the anonymized version of a given plain-text string.
/// After this, 'anonymize(plain)' will return 'anonymized'.
void mapAnonymized(const std::string& plain, const std::string& anonymized);
/// Anonymize the basename of filenames only, preserving the path and extension.
- std::string anonymizeUrl(const std::string& url);
+ std::string anonymizeUrl(const std::string& url, const std::uint64_t nAnonymizationSalt);
/// Extract and return the filename given a url or path.
std::string getFilenameFromURL(const std::string& url);
diff --git a/kit/Kit.cpp b/kit/Kit.cpp
index 0a91009e3..019a061cb 100644
--- a/kit/Kit.cpp
+++ b/kit/Kit.cpp
@@ -114,6 +114,7 @@ class Document;
static std::shared_ptr<Document> document;
#ifndef BUILDING_TESTS
static bool AnonymizeUserData = false;
+static uint64_t AnonymizationSalt = 82589933;
static std::string ObfuscatedFileId;
#endif
@@ -2278,7 +2279,13 @@ void lokit_main(
LOG_INF("Setting log-level to [trace] and delaying setting to configured [" << LogLevel << "] until after Kit initialization.");
}
- AnonymizeUserData = std::getenv("LOOL_ANONYMIZE_USER_DATA") != nullptr;
+ const char* pAnonymizationSalt = std::getenv("LOOL_ANONYMIZATION_SALT");
+ if (pAnonymizationSalt)
+ {
+ AnonymizationSalt = std::stoull(std::string(pAnonymizationSalt));
+ AnonymizeUserData = true;
+ }
+
LOG_INF("User-data anonymization is " << (AnonymizeUserData ? "enabled." : "disabled."));
assert(!childRoot.empty());
@@ -2587,7 +2594,7 @@ void lokit_main(
std::string anonymizeUrl(const std::string& url)
{
#ifndef BUILDING_TESTS
- return AnonymizeUserData ? Util::anonymizeUrl(url) : url;
+ return AnonymizeUserData ? Util::anonymizeUrl(url, AnonymizationSalt) : url;
#else
return url;
#endif
@@ -2669,7 +2676,7 @@ bool globalPreinit(const std::string &loTemplate)
std::string anonymizeUsername(const std::string& username)
{
#ifndef BUILDING_TESTS
- return AnonymizeUserData ? Util::anonymize(username) : username;
+ return AnonymizeUserData ? Util::anonymize(username, AnonymizationSalt) : username;
#else
return username;
#endif
diff --git a/loolwsd.xml.in b/loolwsd.xml.in
index db99c5c41..cd8075d41 100644
--- a/loolwsd.xml.in
+++ b/loolwsd.xml.in
@@ -52,7 +52,8 @@
<property name="flush" desc="Enable/disable flushing after logging each line. May harm performance. Note that without flushing after each line, the log lines from the different processes will not appear in chronological order.">false</property>
</file>
<anonymize>
- <anonymize_user_data type="bool" desc="Enable to anonymize/obfuscate of user data in logs. If default is true, it was forced at compile-time and cannot be disabled." default="@LOOLWSD_ANONYMIZE_USER_DATA@">@LOOLWSD_ANONYMIZE_USER_DATA@</anonymize_user_data>
+ <anonymize_user_data type="bool" desc="Enable to anonymize/obfuscate of user-data in logs. If default is true, it was forced at compile-time and cannot be disabled." default="@LOOLWSD_ANONYMIZE_USER_DATA@">@LOOLWSD_ANONYMIZE_USER_DATA@</anonymize_user_data>
+ <anonymization_salt type="uint" desc="The salt used to anonymize/obfuscate user-data in logs. Use a secret 64-bit random number." default="82589933">82589933</anonymization_salt>
</anonymize>
</logging>
diff --git a/test/WhiteBoxTests.cpp b/test/WhiteBoxTests.cpp
index 3552af21d..801402e87 100644
--- a/test/WhiteBoxTests.cpp
+++ b/test/WhiteBoxTests.cpp
@@ -675,35 +675,67 @@ void WhiteBoxTests::testJson()
void WhiteBoxTests::testAnonymization()
{
static const std::string name = "some name with space";
- CPPUNIT_ASSERT_EQUAL(std::string("#0#77d#"), Util::anonymizeUrl(name));
+ static const std::string filename = "filename.ext";
+ static const std::string filenameTestx = "testx (6).odt";
+ static const std::string path = "/path/to/filename.ext";
+ static const std::string plainUrl
+ = "http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/"
+ "736_ocgdpzbkm39u?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_token_ttl=0&"
+ "permission=edit";
+ static const std::string fileUrl = "http://localhost/owncloud/index.php/apps/richdocuments/"
+ "wopi/files/736_ocgdpzbkm39u/"
+ "secret.odt?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&"
+ "access_token_ttl=0&permission=edit";
+
+ std::uint64_t nAnonymizationSalt = 1111111111182589933;
+
+ CPPUNIT_ASSERT_EQUAL(std::string("#0#5e45aef91248a8aa#"),
+ Util::anonymizeUrl(name, nAnonymizationSalt));
+ CPPUNIT_ASSERT_EQUAL(std::string("#1#8f8d95bd2a202d00#.odt"),
+ Util::anonymizeUrl(filenameTestx, nAnonymizationSalt));
+ CPPUNIT_ASSERT_EQUAL(std::string("/path/to/#2#5c872b2d82ecc8a0#.ext"),
+ Util::anonymizeUrl(path, nAnonymizationSalt));
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/"
+ "#3#22c6f0caad277666#?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_"
+ "token_ttl=0&permission=edit"),
+ Util::anonymizeUrl(plainUrl, nAnonymizationSalt));
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/"
+ "736_ocgdpzbkm39u/"
+ "#4#294f0dfb18f6a80b#.odt?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_"
+ "token_ttl=0&permission=edit"),
+ Util::anonymizeUrl(fileUrl, nAnonymizationSalt));
+
+ nAnonymizationSalt = 0;
+
+ CPPUNIT_ASSERT_EQUAL(std::string("#0#5e45aef91248a8aa#"), Util::anonymizeUrl(name, nAnonymizationSalt));
Util::mapAnonymized(name, name);
- CPPUNIT_ASSERT_EQUAL(name, Util::anonymizeUrl(name));
+ CPPUNIT_ASSERT_EQUAL(name, Util::anonymizeUrl(name, nAnonymizationSalt));
- static const std::string filename = "filename.ext";
- CPPUNIT_ASSERT_EQUAL(std::string("#1#341#.ext"), Util::anonymizeUrl(filename));
- Util::mapAnonymized("filename", "filename");
- CPPUNIT_ASSERT_EQUAL(name, Util::anonymizeUrl(name));
+ CPPUNIT_ASSERT_EQUAL(std::string("#2#5c872b2d82ecc8a0#.ext"),
+ Util::anonymizeUrl(filename, nAnonymizationSalt));
+ Util::mapAnonymized("filename", "filename"); // Identity map of the filename without extension.
+ CPPUNIT_ASSERT_EQUAL(filename, Util::anonymizeUrl(filename, nAnonymizationSalt));
- static const std::string filenameTestx = "testx (6).odt";
- CPPUNIT_ASSERT_EQUAL(std::string("#2#2df#.odt"), Util::anonymizeUrl(filenameTestx));
- Util::mapAnonymized("testx (6)", "testx (6)");
- CPPUNIT_ASSERT_EQUAL(filenameTestx, Util::anonymizeUrl(filenameTestx));
+ CPPUNIT_ASSERT_EQUAL(std::string("#1#8f8d95bd2a202d00#.odt"),
+ Util::anonymizeUrl(filenameTestx, nAnonymizationSalt));
+ Util::mapAnonymized("testx (6)",
+ "testx (6)"); // Identity map of the filename without extension.
+ CPPUNIT_ASSERT_EQUAL(filenameTestx, Util::anonymizeUrl(filenameTestx, nAnonymizationSalt));
- static const std::string path = "/path/to/filename.ext";
- CPPUNIT_ASSERT_EQUAL(path, Util::anonymizeUrl(path));
+ CPPUNIT_ASSERT_EQUAL(path, Util::anonymizeUrl(path, nAnonymizationSalt));
- static const std::string plainUrl = "http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/736_ocgdpzbkm39u?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_token_ttl=0&permission=edit";
- const std::string urlAnonymized = Util::replace(plainUrl, "736_ocgdpzbkm39u", "#3#5a1#");
- CPPUNIT_ASSERT_EQUAL(urlAnonymized, Util::anonymizeUrl(plainUrl));
+ const std::string urlAnonymized = Util::replace(plainUrl, "736_ocgdpzbkm39u", "#3#22c6f0caad277666#");
+ CPPUNIT_ASSERT_EQUAL(urlAnonymized, Util::anonymizeUrl(plainUrl, nAnonymizationSalt));
Util::mapAnonymized("736_ocgdpzbkm39u", "736_ocgdpzbkm39u");
- CPPUNIT_ASSERT_EQUAL(plainUrl, Util::anonymizeUrl(plainUrl));
+ CPPUNIT_ASSERT_EQUAL(plainUrl, Util::anonymizeUrl(plainUrl, nAnonymizationSalt));
- static const std::string fileUrl = "http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/736_ocgdpzbkm39u/secret.odt?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_token_ttl=0&permission=edit";
- const std::string urlAnonymized2 = Util::replace(fileUrl, "secret", "#4#286#");
- CPPUNIT_ASSERT_EQUAL(urlAnonymized2, Util::anonymizeUrl(fileUrl));
+ const std::string urlAnonymized2 = Util::replace(fileUrl, "secret", "#4#294f0dfb18f6a80b#");
+ CPPUNIT_ASSERT_EQUAL(urlAnonymized2, Util::anonymizeUrl(fileUrl, nAnonymizationSalt));
Util::mapAnonymized("secret", "736_ocgdpzbkm39u");
const std::string urlAnonymized3 = Util::replace(fileUrl, "secret", "736_ocgdpzbkm39u");
- CPPUNIT_ASSERT_EQUAL(urlAnonymized3, Util::anonymizeUrl(fileUrl));
+ CPPUNIT_ASSERT_EQUAL(urlAnonymized3, Util::anonymizeUrl(fileUrl, nAnonymizationSalt));
}
CPPUNIT_TEST_SUITE_REGISTRATION(WhiteBoxTests);
diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp
index e3d95e235..574b54bd1 100644
--- a/wsd/LOOLWSD.cpp
+++ b/wsd/LOOLWSD.cpp
@@ -687,6 +687,7 @@ std::string LOOLWSD::ConfigFile = LOOLWSD_CONFIGDIR "/loolwsd.xml";
std::string LOOLWSD::ConfigDir = LOOLWSD_CONFIGDIR "/conf.d";
std::string LOOLWSD::LogLevel = "trace";
bool LOOLWSD::AnonymizeUserData = false;
+std::uint64_t LOOLWSD::AnonymizationSalt = 82589933;
Util::RuntimeConstant<bool> LOOLWSD::SSLEnabled;
Util::RuntimeConstant<bool> LOOLWSD::SSLTermination;
unsigned LOOLWSD::MaxConnections;
@@ -965,7 +966,12 @@ void LOOLWSD::initialize(Application& self)
LOG_INF("Anonymization of user-data is " << (AnonymizeUserData ? "enabled." : "disabled."));
if (AnonymizeUserData)
- setenv("LOOL_ANONYMIZE_USER_DATA", "1", true);
+ {
+ // Get the salt, if set, otherwise default, and set as envar, so the kits inherit it.
+ AnonymizationSalt = getConfigValue<std::uint64_t>(conf, "logging.anonymize.anonymization_salt", 82589933);
+ const std::string sAnonymizationSalt = std::to_string(AnonymizationSalt);
+ setenv("LOOL_ANONYMIZATION_SALT", sAnonymizationSalt.c_str(), true);
+ }
{
std::string proto = getConfigValue<std::string>(conf, "net.proto", "");
diff --git a/wsd/LOOLWSD.hpp b/wsd/LOOLWSD.hpp
index 19f783e60..cadc0e66f 100644
--- a/wsd/LOOLWSD.hpp
+++ b/wsd/LOOLWSD.hpp
@@ -65,6 +65,7 @@ public:
static std::string LOKitVersion;
static std::string LogLevel;
static bool AnonymizeUserData;
+ static std::uint64_t AnonymizationSalt;
static std::atomic<unsigned> NumConnections;
static bool TileCachePersistent;
static std::unique_ptr<TraceFileWriter> TraceDumper;
@@ -141,14 +142,14 @@ public:
/// Anonymize the basename of filenames, preserving the path and extension.
static std::string anonymizeUrl(const std::string& url)
{
- return AnonymizeUserData ? Util::anonymizeUrl(url) : url;
+ return AnonymizeUserData ? Util::anonymizeUrl(url, AnonymizationSalt) : url;
}
/// Anonymize user names and IDs.
/// Will use the Obfuscated User ID if one is provied via WOPI.
static std::string anonymizeUsername(const std::string& username)
{
- return AnonymizeUserData ? Util::anonymize(username) : username;
+ return AnonymizeUserData ? Util::anonymize(username, AnonymizationSalt) : username;
}
int innerMain();
@@ -184,6 +185,7 @@ private:
void operator()(int& value) { value = _config.getInt(_name); }
void operator()(unsigned int& value) { value = _config.getUInt(_name); }
+ void operator()(unsigned long& value) { value = _config.getUInt64(_name); }
void operator()(bool& value) { value = _config.getBool(_name); }
void operator()(std::string& value) { value = _config.getString(_name); }
void operator()(double& value) { value = _config.getDouble(_name); }
More information about the Libreoffice-commits
mailing list