[Libreoffice-commits] online.git: wsd/LOOLWSD.cpp wsd/LOOLWSD.hpp

Ashod Nakashian ashod.nakashian at collabora.co.uk
Mon Jan 16 02:17:28 UTC 2017


 wsd/LOOLWSD.cpp |  156 ++++++++++++++++++++++++++++++++------------------------
 wsd/LOOLWSD.hpp |    4 +
 2 files changed, 94 insertions(+), 66 deletions(-)

New commits:
commit 63dd8fca9b117f502e2145619dfc71342f822f32
Author: Ashod Nakashian <ashod.nakashian at collabora.co.uk>
Date:   Sun Jan 15 20:12:10 2017 -0500

    wsd: improved forkit crash recovery
    
    Refactored the forkit process wait and
    re-fork into separate function.
    
    Change-Id: If6106ea3820d10b4448bb27740d757afcea4779f
    Reviewed-on: https://gerrit.libreoffice.org/33137
    Reviewed-by: Ashod Nakashian <ashnakash at gmail.com>
    Tested-by: Ashod Nakashian <ashnakash at gmail.com>

diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp
index ac89bdd..c9f9e7c 100644
--- a/wsd/LOOLWSD.cpp
+++ b/wsd/LOOLWSD.cpp
@@ -330,6 +330,8 @@ static bool forkChildren(const int number)
             LastForkRequestTime = std::chrono::steady_clock::now();
             return true;
         }
+
+        LOG_ERR("No forkit pipe while rebalancing children.");
     }
 
     return false;
@@ -476,6 +478,7 @@ static std::shared_ptr<ChildProcess> getNewChild()
         if (!rebalanceChildren(numPreSpawn))
         {
             // Fatal. Let's fail and retry at a higher level.
+            LOG_DBG("getNewChild: rebalancing of children failed.");
             return nullptr;
         }
 
@@ -869,6 +872,7 @@ private:
                 if (retry > 0)
                 {
                     LOG_WRN("Failed to connect DocBroker and Client Session, retrying.");
+                    LOOLWSD::checkAndRestoreForKit();
                 }
                 else
                 {
@@ -2076,14 +2080,80 @@ void LOOLWSD::displayHelp()
     helpFormatter.format(std::cout);
 }
 
+bool LOOLWSD::checkAndRestoreForKit()
+{
+    int status;
+    const pid_t pid = waitpid(ForKitProcId, &status, WUNTRACED | WNOHANG);
+    if (pid > 0)
+    {
+        if (pid == ForKitProcId)
+        {
+            if (WIFEXITED(status) || WIFSIGNALED(status))
+            {
+                if (WIFEXITED(status))
+                {
+                    LOG_INF("Forkit process [" << pid << "] exited with code: " <<
+                            WEXITSTATUS(status) << ".");
+                }
+                else
+                {
+                    LOG_ERR("Forkit process [" << pid << "] " <<
+                            (WCOREDUMP(status) ? "core-dumped" : "died") <<
+                            " with " << SigUtil::signalName(WTERMSIG(status)));
+                }
+
+                // Spawn a new forkit and try to dust it off and resume.
+                if (!createForKit())
+                {
+                    LOG_FTL("Failed to spawn forkit instance. Shutting down.");
+                    SigUtil::requestShutdown();
+                }
+            }
+            else if (WIFSTOPPED(status))
+            {
+                LOG_INF("Forkit process [" << pid << "] stopped with " <<
+                        SigUtil::signalName(WSTOPSIG(status)));
+            }
+            else if (WIFCONTINUED(status))
+            {
+                LOG_INF("Forkit process [" << pid << "] resumed with SIGCONT.");
+            }
+            else
+            {
+                LOG_WRN("Unknown status returned by waitpid: " << std::hex << status << ".");
+            }
+
+            return true;
+        }
+        else
+        {
+            LOG_ERR("An unknown child process [" << pid << "] died.");
+        }
+    }
+    else if (pid < 0)
+    {
+        LOG_SYS("Forkit waitpid failed.");
+        if (errno == ECHILD)
+        {
+            // No child processes.
+            // Spawn a new forkit and try to dust it off and resume.
+            if (!createForKit())
+            {
+                LOG_FTL("Failed to spawn forkit instance. Shutting down.");
+                SigUtil::requestShutdown();
+            }
+        }
+
+        return true;
+    }
+
+    return false;
+}
+
 bool LOOLWSD::createForKit()
 {
     LOG_INF("Creating new forkit process.");
 
-    const int oldForKitWritePipe = ForKitWritePipe;
-    ForKitWritePipe = -1;
-    close(oldForKitWritePipe);
-
     Process::Args args;
     args.push_back("--losubpath=" + std::string(LO_JAIL_SUBPATH));
     args.push_back("--systemplate=" + SysTemplate);
@@ -2092,9 +2162,14 @@ bool LOOLWSD::createForKit()
     args.push_back("--clientport=" + std::to_string(ClientPortNumber));
     args.push_back("--masterport=" + std::to_string(MasterPortNumber));
     if (UnitWSD::get().hasKitHooks())
+    {
         args.push_back("--unitlib=" + UnitTestLibrary);
+    }
+
     if (DisplayVersion)
+    {
         args.push_back("--version");
+    }
 
     std::string forKitPath = Path(Application::instance().commandPath()).parent().toString() + "loolforkit";
     if (NoCapsForKit)
@@ -2107,6 +2182,15 @@ bool LOOLWSD::createForKit()
     std::unique_lock<std::mutex> docBrokersLock(DocBrokersMutex);
     std::unique_lock<std::mutex> newChildrenLock(NewChildrenMutex);
 
+    // Always reap first, in case we haven't done so yet.
+    int status;
+    waitpid(ForKitProcId, &status, WUNTRACED | WNOHANG);
+    ForKitProcId = -1;
+    Admin::instance().setForKitPid(ForKitProcId);
+
+    const int oldForKitWritePipe = ForKitWritePipe;
+    ForKitWritePipe = -1;
+    close(oldForKitWritePipe);
 
     // ForKit always spawns one.
     ++OutstandingForks;
@@ -2268,69 +2352,9 @@ int LOOLWSD::main(const std::vector<std::string>& /*args*/)
             break;
         }
 
-        const pid_t pid = waitpid(ForKitProcId, &status, WUNTRACED | WNOHANG);
-        if (pid > 0)
-        {
-            if (ForKitProcId == pid)
-            {
-                if (WIFEXITED(status) || WIFSIGNALED(status))
-                {
-                    if (WIFEXITED(status))
-                    {
-                        LOG_INF("Forkit process [" << pid << "] exited with code: " <<
-                                WEXITSTATUS(status) << ".");
-                    }
-                    else
-                    {
-                        LOG_ERR("Forkit process [" << pid << "] " <<
-                                (WCOREDUMP(status) ? "core-dumped" : "died") <<
-                                " with " << SigUtil::signalName(WTERMSIG(status)));
-                    }
-
-                    // Spawn a new forkit and try to dust it off and resume.
-                    if (!createForKit())
-                    {
-                        LOG_FTL("Failed to spawn forkit instance. Shutting down.");
-                        SigUtil::requestShutdown();
-                        break;
-                    }
-                }
-                else if (WIFSTOPPED(status))
-                {
-                    LOG_INF("Forkit process [" << pid << "] stopped with " <<
-                            SigUtil::signalName(WSTOPSIG(status)));
-                }
-                else if (WIFCONTINUED(status))
-                {
-                    LOG_INF("Forkit process [" << pid << "] resumed with SIGCONT.");
-                }
-                else
-                {
-                    LOG_WRN("Unknown status returned by waitpid: " << std::hex << status << ".");
-                }
-            }
-            else
-            {
-                LOG_ERR("An unknown child process [" << pid << "] died.");
-            }
-        }
-        else if (pid < 0)
-        {
-            LOG_SYS("Forkit waitpid failed.");
-            if (errno == ECHILD)
-            {
-                // No child processes.
-                // Spawn a new forkit and try to dust it off and resume.
-                if (!createForKit())
-                {
-                    LOG_FTL("Failed to spawn forkit instance. Shutting down.");
-                    SigUtil::requestShutdown();
-                    break;
-                }
-            }
-        }
-        else // pid == 0, no children have died
+        if (!checkAndRestoreForKit())
         {
+            // No children have died.
             // Make sure we have sufficient reserves.
             if (prespawnChildren())
             {
diff --git a/wsd/LOOLWSD.hpp b/wsd/LOOLWSD.hpp
index f802981..fa18f25 100644
--- a/wsd/LOOLWSD.hpp
+++ b/wsd/LOOLWSD.hpp
@@ -69,6 +69,10 @@ public:
 
     static void dumpOutgoingTrace(const std::string& id, const std::string& sessionId, const std::string& data);
 
+    /// Waits on Forkit and reaps if it dies, then restores.
+    /// Return true if wait succeeds.
+    static bool checkAndRestoreForKit();
+
     /// Creates a new instance of Forkit.
     /// Return true when successfull.
     static bool createForKit();


More information about the Libreoffice-commits mailing list