[Libreoffice-commits] online.git: wsd/LOOLWSD.cpp wsd/LOOLWSD.hpp
Ashod Nakashian
ashod.nakashian at collabora.co.uk
Mon Jan 16 02:17:28 UTC 2017
wsd/LOOLWSD.cpp | 156 ++++++++++++++++++++++++++++++++------------------------
wsd/LOOLWSD.hpp | 4 +
2 files changed, 94 insertions(+), 66 deletions(-)
New commits:
commit 63dd8fca9b117f502e2145619dfc71342f822f32
Author: Ashod Nakashian <ashod.nakashian at collabora.co.uk>
Date: Sun Jan 15 20:12:10 2017 -0500
wsd: improved forkit crash recovery
Refactored the forkit process wait and
re-fork into separate function.
Change-Id: If6106ea3820d10b4448bb27740d757afcea4779f
Reviewed-on: https://gerrit.libreoffice.org/33137
Reviewed-by: Ashod Nakashian <ashnakash at gmail.com>
Tested-by: Ashod Nakashian <ashnakash at gmail.com>
diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp
index ac89bdd..c9f9e7c 100644
--- a/wsd/LOOLWSD.cpp
+++ b/wsd/LOOLWSD.cpp
@@ -330,6 +330,8 @@ static bool forkChildren(const int number)
LastForkRequestTime = std::chrono::steady_clock::now();
return true;
}
+
+ LOG_ERR("No forkit pipe while rebalancing children.");
}
return false;
@@ -476,6 +478,7 @@ static std::shared_ptr<ChildProcess> getNewChild()
if (!rebalanceChildren(numPreSpawn))
{
// Fatal. Let's fail and retry at a higher level.
+ LOG_DBG("getNewChild: rebalancing of children failed.");
return nullptr;
}
@@ -869,6 +872,7 @@ private:
if (retry > 0)
{
LOG_WRN("Failed to connect DocBroker and Client Session, retrying.");
+ LOOLWSD::checkAndRestoreForKit();
}
else
{
@@ -2076,14 +2080,80 @@ void LOOLWSD::displayHelp()
helpFormatter.format(std::cout);
}
+bool LOOLWSD::checkAndRestoreForKit()
+{
+ int status;
+ const pid_t pid = waitpid(ForKitProcId, &status, WUNTRACED | WNOHANG);
+ if (pid > 0)
+ {
+ if (pid == ForKitProcId)
+ {
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ {
+ if (WIFEXITED(status))
+ {
+ LOG_INF("Forkit process [" << pid << "] exited with code: " <<
+ WEXITSTATUS(status) << ".");
+ }
+ else
+ {
+ LOG_ERR("Forkit process [" << pid << "] " <<
+ (WCOREDUMP(status) ? "core-dumped" : "died") <<
+ " with " << SigUtil::signalName(WTERMSIG(status)));
+ }
+
+ // Spawn a new forkit and try to dust it off and resume.
+ if (!createForKit())
+ {
+ LOG_FTL("Failed to spawn forkit instance. Shutting down.");
+ SigUtil::requestShutdown();
+ }
+ }
+ else if (WIFSTOPPED(status))
+ {
+ LOG_INF("Forkit process [" << pid << "] stopped with " <<
+ SigUtil::signalName(WSTOPSIG(status)));
+ }
+ else if (WIFCONTINUED(status))
+ {
+ LOG_INF("Forkit process [" << pid << "] resumed with SIGCONT.");
+ }
+ else
+ {
+ LOG_WRN("Unknown status returned by waitpid: " << std::hex << status << ".");
+ }
+
+ return true;
+ }
+ else
+ {
+ LOG_ERR("An unknown child process [" << pid << "] died.");
+ }
+ }
+ else if (pid < 0)
+ {
+ LOG_SYS("Forkit waitpid failed.");
+ if (errno == ECHILD)
+ {
+ // No child processes.
+ // Spawn a new forkit and try to dust it off and resume.
+ if (!createForKit())
+ {
+ LOG_FTL("Failed to spawn forkit instance. Shutting down.");
+ SigUtil::requestShutdown();
+ }
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
bool LOOLWSD::createForKit()
{
LOG_INF("Creating new forkit process.");
- const int oldForKitWritePipe = ForKitWritePipe;
- ForKitWritePipe = -1;
- close(oldForKitWritePipe);
-
Process::Args args;
args.push_back("--losubpath=" + std::string(LO_JAIL_SUBPATH));
args.push_back("--systemplate=" + SysTemplate);
@@ -2092,9 +2162,14 @@ bool LOOLWSD::createForKit()
args.push_back("--clientport=" + std::to_string(ClientPortNumber));
args.push_back("--masterport=" + std::to_string(MasterPortNumber));
if (UnitWSD::get().hasKitHooks())
+ {
args.push_back("--unitlib=" + UnitTestLibrary);
+ }
+
if (DisplayVersion)
+ {
args.push_back("--version");
+ }
std::string forKitPath = Path(Application::instance().commandPath()).parent().toString() + "loolforkit";
if (NoCapsForKit)
@@ -2107,6 +2182,15 @@ bool LOOLWSD::createForKit()
std::unique_lock<std::mutex> docBrokersLock(DocBrokersMutex);
std::unique_lock<std::mutex> newChildrenLock(NewChildrenMutex);
+ // Always reap first, in case we haven't done so yet.
+ int status;
+ waitpid(ForKitProcId, &status, WUNTRACED | WNOHANG);
+ ForKitProcId = -1;
+ Admin::instance().setForKitPid(ForKitProcId);
+
+ const int oldForKitWritePipe = ForKitWritePipe;
+ ForKitWritePipe = -1;
+ close(oldForKitWritePipe);
// ForKit always spawns one.
++OutstandingForks;
@@ -2268,69 +2352,9 @@ int LOOLWSD::main(const std::vector<std::string>& /*args*/)
break;
}
- const pid_t pid = waitpid(ForKitProcId, &status, WUNTRACED | WNOHANG);
- if (pid > 0)
- {
- if (ForKitProcId == pid)
- {
- if (WIFEXITED(status) || WIFSIGNALED(status))
- {
- if (WIFEXITED(status))
- {
- LOG_INF("Forkit process [" << pid << "] exited with code: " <<
- WEXITSTATUS(status) << ".");
- }
- else
- {
- LOG_ERR("Forkit process [" << pid << "] " <<
- (WCOREDUMP(status) ? "core-dumped" : "died") <<
- " with " << SigUtil::signalName(WTERMSIG(status)));
- }
-
- // Spawn a new forkit and try to dust it off and resume.
- if (!createForKit())
- {
- LOG_FTL("Failed to spawn forkit instance. Shutting down.");
- SigUtil::requestShutdown();
- break;
- }
- }
- else if (WIFSTOPPED(status))
- {
- LOG_INF("Forkit process [" << pid << "] stopped with " <<
- SigUtil::signalName(WSTOPSIG(status)));
- }
- else if (WIFCONTINUED(status))
- {
- LOG_INF("Forkit process [" << pid << "] resumed with SIGCONT.");
- }
- else
- {
- LOG_WRN("Unknown status returned by waitpid: " << std::hex << status << ".");
- }
- }
- else
- {
- LOG_ERR("An unknown child process [" << pid << "] died.");
- }
- }
- else if (pid < 0)
- {
- LOG_SYS("Forkit waitpid failed.");
- if (errno == ECHILD)
- {
- // No child processes.
- // Spawn a new forkit and try to dust it off and resume.
- if (!createForKit())
- {
- LOG_FTL("Failed to spawn forkit instance. Shutting down.");
- SigUtil::requestShutdown();
- break;
- }
- }
- }
- else // pid == 0, no children have died
+ if (!checkAndRestoreForKit())
{
+ // No children have died.
// Make sure we have sufficient reserves.
if (prespawnChildren())
{
diff --git a/wsd/LOOLWSD.hpp b/wsd/LOOLWSD.hpp
index f802981..fa18f25 100644
--- a/wsd/LOOLWSD.hpp
+++ b/wsd/LOOLWSD.hpp
@@ -69,6 +69,10 @@ public:
static void dumpOutgoingTrace(const std::string& id, const std::string& sessionId, const std::string& data);
+ /// Waits on Forkit and reaps if it dies, then restores.
+ /// Return true if wait succeeds.
+ static bool checkAndRestoreForKit();
+
/// Creates a new instance of Forkit.
/// Return true when successfull.
static bool createForKit();
More information about the Libreoffice-commits
mailing list