[Libreoffice-commits] online.git: Branch 'distro/collabora/collabora-online-4-0' - common/Common.hpp wsd/DocumentBroker.cpp wsd/LOOLWSD.cpp
Ashod Nakashian (via logerrit)
logerrit at kemper.freedesktop.org
Tue Oct 22 17:01:21 UTC 2019
common/Common.hpp | 2 +-
wsd/DocumentBroker.cpp | 7 ++++++-
wsd/LOOLWSD.cpp | 16 ++++++++++++----
3 files changed, 19 insertions(+), 6 deletions(-)
New commits:
commit 32fa1d95fc2ec65866d0cb47d619885182db7040
Author: Ashod Nakashian <ashod.nakashian at collabora.co.uk>
AuthorDate: Fri Oct 18 08:10:12 2019 -0400
Commit: Andras Timar <andras.timar at collabora.com>
CommitDate: Tue Oct 22 19:01:03 2019 +0200
wsd: dynamic child timeout
The initial child spawning takes significantly longer
than subsequent ones (for obvious reasons) and this
lead to unit-tests being sensitive to the timeout
we use for child spawning. Too short, and we
spawn more than we want on startup, too long
and crash-recovery tests fail (we don't
recover fast enough, as we wait too long before
spawning new children).
Dynamically setting the timeout allows us to give
longer timeout at startup, and reduce it afterwards.
Change-Id: I8423f5c6619e57030ab43d519aaa41d8712c36d3
Reviewed-on: https://gerrit.libreoffice.org/81194
Reviewed-by: Andras Timar <andras.timar at collabora.com>
Tested-by: Andras Timar <andras.timar at collabora.com>
diff --git a/common/Common.hpp b/common/Common.hpp
index 599de0779..84b5eaae2 100644
--- a/common/Common.hpp
+++ b/common/Common.hpp
@@ -15,7 +15,7 @@ constexpr int DEFAULT_CLIENT_PORT_NUMBER = 9980;
constexpr int DEFAULT_MASTER_PORT_NUMBER = 9981;
constexpr int COMMAND_TIMEOUT_MS = 5000;
-constexpr int CHILD_TIMEOUT_MS = COMMAND_TIMEOUT_MS * 2;
+constexpr int CHILD_TIMEOUT_MS = COMMAND_TIMEOUT_MS;
constexpr int CHILD_REBALANCE_INTERVAL_MS = CHILD_TIMEOUT_MS / 10;
constexpr int POLL_TIMEOUT_MS = COMMAND_TIMEOUT_MS / 5;
constexpr int WS_SEND_TIMEOUT_MS = 1000;
diff --git a/wsd/DocumentBroker.cpp b/wsd/DocumentBroker.cpp
index 657ea999b..0c423bc52 100644
--- a/wsd/DocumentBroker.cpp
+++ b/wsd/DocumentBroker.cpp
@@ -373,7 +373,12 @@ void DocumentBroker::pollThread()
}
// Flush socket data first.
- const int flushTimeoutMs = POLL_TIMEOUT_MS * 2; // ~1000ms
+ constexpr int flushTimeoutMs = POLL_TIMEOUT_MS * 2; // ~1000ms
+ LOG_INF("Flushing socket for doc ["
+ << _docKey << "] for " << flushTimeoutMs << " ms. stop: " << _stop
+ << ", continuePolling: " << _poll->continuePolling() << ", ShutdownRequestFlag: "
+ << ShutdownRequestFlag << ", TerminationFlag: " << TerminationFlag
+ << ". Terminating child with reason: [" << _closeReason << "].");
const auto flushStartTime = std::chrono::steady_clock::now();
while (_poll->getSocketCount())
{
diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp
index 5b8f43949..af101523c 100644
--- a/wsd/LOOLWSD.cpp
+++ b/wsd/LOOLWSD.cpp
@@ -206,6 +206,8 @@ extern "C" { void dump_state(void); /* easy for gdb */ }
static int careerSpanMs = 0;
#endif
+/// The timeout for a child to spawn, initially high, then reset to the default.
+int ChildSpawnTimeoutMs = CHILD_TIMEOUT_MS * 4;
bool LOOLWSD::NoCapsForKit = false;
bool LOOLWSD::TileCachePersistent = true;
std::atomic<unsigned> LOOLWSD::NumConnections;
@@ -422,7 +424,7 @@ static int rebalanceChildren(int balance)
const auto duration = (std::chrono::steady_clock::now() - LastForkRequestTime);
const std::chrono::milliseconds::rep durationMs = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
- if (OutstandingForks != 0 && durationMs >= CHILD_TIMEOUT_MS)
+ if (OutstandingForks != 0 && durationMs >= ChildSpawnTimeoutMs)
{
// Children taking too long to spawn.
// Forget we had requested any, and request anew.
@@ -504,7 +506,7 @@ std::shared_ptr<ChildProcess> getNewChild_Blocks(
}
// With valgrind we need extended time to spawn kits.
- const size_t timeoutMs = CHILD_TIMEOUT_MS / 2;
+ const size_t timeoutMs = ChildSpawnTimeoutMs / 2;
LOG_TRC("Waiting for a new child for a max of " << timeoutMs << " ms.");
const auto timeout = std::chrono::milliseconds(timeoutMs);
#else
@@ -1642,7 +1644,10 @@ bool LOOLWSD::createForKit()
Admin::instance().setForKitPid(ForKitProcId);
Admin::instance().setForKitWritePipe(ForKitWritePipe);
- rebalanceChildren(LOOLWSD::NumPreSpawnedChildren - 1);
+ const int balance = LOOLWSD::NumPreSpawnedChildren - OutstandingForks;
+ if (balance > 0)
+ rebalanceChildren(balance);
+
return ForKitProcId != -1;
#endif
}
@@ -3191,7 +3196,7 @@ int LOOLWSD::innerMain()
}
else
{
- const int timeoutMs = CHILD_TIMEOUT_MS * (LOOLWSD::NoCapsForKit ? 150 : 50);
+ const int timeoutMs = ChildSpawnTimeoutMs * (LOOLWSD::NoCapsForKit ? 150 : 50);
const auto timeout = std::chrono::milliseconds(timeoutMs);
LOG_TRC("Waiting for a new child for a max of " << timeoutMs << " ms.");
if (!NewChildrenCV.wait_for(lock, timeout, []() { return !NewChildren.empty(); }))
@@ -3229,6 +3234,9 @@ int LOOLWSD::innerMain()
std::cerr << "Ready to accept connections on port " << ClientPortNumber << ".\n" << std::endl;
#endif
+ // Reset the child-spawn timeout to the default, now that we're set.
+ ChildSpawnTimeoutMs = CHILD_TIMEOUT_MS;
+
const auto startStamp = std::chrono::steady_clock::now();
while (!TerminationFlag && !ShutdownRequestFlag)
More information about the Libreoffice-commits
mailing list