[Libreoffice-commits] online.git: common/SigUtil.cpp kit/ForKit.cpp wsd/Admin.cpp wsd/Admin.hpp wsd/AdminModel.cpp wsd/AdminModel.hpp wsd/LOOLWSD.cpp

Gabriel Masei (via logerrit) logerrit at kemper.freedesktop.org
Wed Apr 8 13:06:57 UTC 2020


 common/SigUtil.cpp |    2 +-
 kit/ForKit.cpp     |   36 ++++++++++++++++++++++++++++++++++--
 wsd/Admin.cpp      |    5 +++++
 wsd/Admin.hpp      |    1 +
 wsd/AdminModel.cpp |    6 ++++++
 wsd/AdminModel.hpp |    4 ++++
 wsd/LOOLWSD.cpp    |   22 +++++++++++++++++++---
 7 files changed, 70 insertions(+), 6 deletions(-)

New commits:
commit 0ac330f599b8daaeae5ba057fddb6994fd078e73
Author:     Gabriel Masei <gabriel.masei at 1and1.ro>
AuthorDate: Wed Apr 8 00:21:17 2020 +0300
Commit:     Michael Meeks <michael.meeks at collabora.com>
CommitDate: Wed Apr 8 15:06:39 2020 +0200

    Add segmentation fault metric for Kit processes
    
    Change-Id: Ifb0de004274213ef512f601e4419f98f456c7288
    Reviewed-on: https://gerrit.libreoffice.org/c/online/+/91857
    Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice at gmail.com>
    Reviewed-by: Michael Meeks <michael.meeks at collabora.com>

diff --git a/common/SigUtil.cpp b/common/SigUtil.cpp
index 944a8d376..cd4c779dc 100644
--- a/common/SigUtil.cpp
+++ b/common/SigUtil.cpp
@@ -260,7 +260,7 @@ namespace SigUtil
             dumpBacktrace();
 
         // let default handler process the signal
-        kill(getpid(), signal);
+        ::raise(signal);
     }
 
     void dumpBacktrace()
diff --git a/kit/ForKit.cpp b/kit/ForKit.cpp
index 8b18ef34e..bea72f4d5 100644
--- a/kit/ForKit.cpp
+++ b/kit/ForKit.cpp
@@ -36,6 +36,9 @@
 #include <Unit.hpp>
 #include <Util.hpp>
 #include <WebSocketHandler.hpp>
+#if !MOBILEAPP
+#include <Admin.hpp>
+#endif
 
 #include <common/FileUtil.hpp>
 #include <common/Seccomp.hpp>
@@ -226,7 +229,7 @@ static void cleanupChildren()
 {
     std::vector<std::string> jails;
     Process::PID exitedChildPid;
-    int status;
+    int status, segFaultCount = 0;
 
     // Reap quickly without doing slow cleanup so WSD can spawn more rapidly.
     while ((exitedChildPid = waitpid(-1, &status, WUNTRACED | WNOHANG)) > 0)
@@ -242,13 +245,42 @@ static void cleanupChildren()
                 // We ran out of kits and we aren't terminating.
                 LOG_WRN("No live Kits exist, and we are not terminating yet.");
             }
+
+            if (WIFSIGNALED(status) && (WTERMSIG(status) == SIGSEGV || WTERMSIG(status) == SIGBUS))
+            {
+                segFaultCount ++;
+            }
         }
         else
         {
             LOG_ERR("Unknown child " << exitedChildPid << " has exited");
         }
     }
-    
+
+    if (segFaultCount)
+    {
+#ifdef KIT_IN_PROCESS
+#if !MOBILEAPP
+        Admin::instance().addSegFaultCount(segFaultCount);
+#endif
+#else
+        if (WSHandler)
+        {
+            std::stringstream stream;
+            stream << "segfaultcount " << segFaultCount << "\n";
+            int ret = WSHandler->sendMessage(stream.str());
+            if (ret == -1)
+            {
+                LOG_WRN("Could not send 'segfaultcount' message through websocket");
+            }
+            else
+            {
+                LOG_WRN("Successfully sent 'segfaultcount' message " << stream.str());
+            }
+        }
+#endif
+    }
+
     // Now delete the jails.
     for (const auto& path : jails)
     {
diff --git a/wsd/Admin.cpp b/wsd/Admin.cpp
index 88b9268df..3fc0b6955 100644
--- a/wsd/Admin.cpp
+++ b/wsd/Admin.cpp
@@ -593,6 +593,11 @@ void Admin::setDocWopiUploadDuration(const std::string& docKey, const std::chron
     addCallback([=]{ _model.setDocWopiUploadDuration(docKey, uploadDuration); });
 }
 
+void Admin::addSegFaultCount(unsigned segFaultCount)
+{
+    addCallback([=]{ _model.addSegFaultCount(segFaultCount); });
+}
+
 void Admin::notifyForkit()
 {
     std::ostringstream oss;
diff --git a/wsd/Admin.hpp b/wsd/Admin.hpp
index e39d89fe7..6287d38bc 100644
--- a/wsd/Admin.hpp
+++ b/wsd/Admin.hpp
@@ -129,6 +129,7 @@ public:
     void setViewLoadDuration(const std::string& docKey, const std::string& sessionId, std::chrono::milliseconds viewLoadDuration);
     void setDocWopiDownloadDuration(const std::string& docKey, std::chrono::milliseconds wopiDownloadDuration);
     void setDocWopiUploadDuration(const std::string& docKey, const std::chrono::milliseconds uploadDuration);
+    void addSegFaultCount(unsigned segFaultCount);
 
     void getMetrics(std::ostringstream &metrics);
 
diff --git a/wsd/AdminModel.cpp b/wsd/AdminModel.cpp
index 22e402a83..f19d6e367 100644
--- a/wsd/AdminModel.cpp
+++ b/wsd/AdminModel.cpp
@@ -771,6 +771,11 @@ void AdminModel::setDocWopiUploadDuration(const std::string& docKey, const std::
         it->second.setWopiUploadDuration(wopiUploadDuration);
 }
 
+void AdminModel::addSegFaultCount(unsigned segFaultCount)
+{
+    _segFaultCount += segFaultCount;
+}
+
 int filterNumberName(const struct dirent *dir)
 {
     return !fnmatch("[0-9]*", dir->d_name, 0);
@@ -993,6 +998,7 @@ void AdminModel::getMetrics(std::ostringstream &oss)
     oss << "kit_count " << kitStats.unassignedCount + kitStats.assignedCount << std::endl;
     oss << "kit_unassigned_count " << kitStats.unassignedCount << std::endl;
     oss << "kit_assigned_count " << kitStats.assignedCount << std::endl;
+    oss << "kit_segfault_count " << _segFaultCount << std::endl;
     PrintKitAggregateMetrics(oss, "thread_count", "", kitStats._threadCount);
     PrintKitAggregateMetrics(oss, "memory_used", "bytes", docStats._kitUsedMemory._all);
     PrintKitAggregateMetrics(oss, "cpu_time", "seconds", kitStats._cpuTime);
diff --git a/wsd/AdminModel.hpp b/wsd/AdminModel.hpp
index be5d9dd0f..b097289ef 100644
--- a/wsd/AdminModel.hpp
+++ b/wsd/AdminModel.hpp
@@ -249,6 +249,7 @@ class AdminModel
 public:
 
     AdminModel() :
+        _segFaultCount(0),
         _owner(std::this_thread::get_id())
     {
         LOG_INF("AdminModel ctor.");
@@ -316,6 +317,7 @@ public:
     void setViewLoadDuration(const std::string& docKey, const std::string& sessionId, std::chrono::milliseconds viewLoadDuration);
     void setDocWopiDownloadDuration(const std::string& docKey, std::chrono::milliseconds wopiDownloadDuration);
     void setDocWopiUploadDuration(const std::string& docKey, const std::chrono::milliseconds wopiUploadDuration);
+    void addSegFaultCount(unsigned segFaultCount);
     void setForKitPid(pid_t pid) { _forKitPid = pid; }
 
     void getMetrics(std::ostringstream &oss);
@@ -360,6 +362,8 @@ private:
     uint64_t _sentBytesTotal;
     uint64_t _recvBytesTotal;
 
+    uint64_t _segFaultCount;
+
     pid_t _forKitPid;
 
     /// We check the owner even in the release builds, needs to be always correct.
diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp
index e9e17b86f..c732169c7 100644
--- a/wsd/LOOLWSD.cpp
+++ b/wsd/LOOLWSD.cpp
@@ -835,9 +835,25 @@ void ForKitProcWSHandler::handleMessage(const std::vector<char> &data)
     const std::string firstLine = LOOLProtocol::getFirstLine(&data[0], data.size());
     const StringVector tokens = LOOLProtocol::tokenize(firstLine.data(), firstLine.size());
 
-    // Just add here the processing of specific received messages 
-
-    LOG_ERR("ForKitProcWSHandler: unknown command: " << tokens[0]);
+    if (tokens.equals(0, "segfaultcount"))
+    {
+        int count = std::stoi(tokens[1]);
+        if (count >= 0)
+        {
+#if !MOBILEAPP
+            Admin::instance().addSegFaultCount(count);
+#endif
+            LOG_INF(count << " loolkit processes crashed with segmentation fault.");
+        }
+        else
+        {
+            LOG_WRN("Invalid 'segfaultcount' message received.");
+        }
+    }
+    else
+    {
+        LOG_ERR("ForKitProcWSHandler: unknown command: " << tokens[0]);
+    }
 }
 
 LOOLWSD::LOOLWSD()


More information about the Libreoffice-commits mailing list