[Libreoffice-commits] online.git: common/SigUtil.cpp kit/ForKit.cpp wsd/Admin.cpp wsd/Admin.hpp wsd/AdminModel.cpp wsd/AdminModel.hpp wsd/LOOLWSD.cpp
Gabriel Masei (via logerrit)
logerrit at kemper.freedesktop.org
Wed Apr 8 13:06:57 UTC 2020
common/SigUtil.cpp | 2 +-
kit/ForKit.cpp | 36 ++++++++++++++++++++++++++++++++++--
wsd/Admin.cpp | 5 +++++
wsd/Admin.hpp | 1 +
wsd/AdminModel.cpp | 6 ++++++
wsd/AdminModel.hpp | 4 ++++
wsd/LOOLWSD.cpp | 22 +++++++++++++++++++---
7 files changed, 70 insertions(+), 6 deletions(-)
New commits:
commit 0ac330f599b8daaeae5ba057fddb6994fd078e73
Author: Gabriel Masei <gabriel.masei at 1and1.ro>
AuthorDate: Wed Apr 8 00:21:17 2020 +0300
Commit: Michael Meeks <michael.meeks at collabora.com>
CommitDate: Wed Apr 8 15:06:39 2020 +0200
Add segmentation fault metric for Kit processes
Change-Id: Ifb0de004274213ef512f601e4419f98f456c7288
Reviewed-on: https://gerrit.libreoffice.org/c/online/+/91857
Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice at gmail.com>
Reviewed-by: Michael Meeks <michael.meeks at collabora.com>
diff --git a/common/SigUtil.cpp b/common/SigUtil.cpp
index 944a8d376..cd4c779dc 100644
--- a/common/SigUtil.cpp
+++ b/common/SigUtil.cpp
@@ -260,7 +260,7 @@ namespace SigUtil
dumpBacktrace();
// let default handler process the signal
- kill(getpid(), signal);
+ ::raise(signal);
}
void dumpBacktrace()
diff --git a/kit/ForKit.cpp b/kit/ForKit.cpp
index 8b18ef34e..bea72f4d5 100644
--- a/kit/ForKit.cpp
+++ b/kit/ForKit.cpp
@@ -36,6 +36,9 @@
#include <Unit.hpp>
#include <Util.hpp>
#include <WebSocketHandler.hpp>
+#if !MOBILEAPP
+#include <Admin.hpp>
+#endif
#include <common/FileUtil.hpp>
#include <common/Seccomp.hpp>
@@ -226,7 +229,7 @@ static void cleanupChildren()
{
std::vector<std::string> jails;
Process::PID exitedChildPid;
- int status;
+ int status, segFaultCount = 0;
// Reap quickly without doing slow cleanup so WSD can spawn more rapidly.
while ((exitedChildPid = waitpid(-1, &status, WUNTRACED | WNOHANG)) > 0)
@@ -242,13 +245,42 @@ static void cleanupChildren()
// We ran out of kits and we aren't terminating.
LOG_WRN("No live Kits exist, and we are not terminating yet.");
}
+
+ if (WIFSIGNALED(status) && (WTERMSIG(status) == SIGSEGV || WTERMSIG(status) == SIGBUS))
+ {
+ segFaultCount ++;
+ }
}
else
{
LOG_ERR("Unknown child " << exitedChildPid << " has exited");
}
}
-
+
+ if (segFaultCount)
+ {
+#ifdef KIT_IN_PROCESS
+#if !MOBILEAPP
+ Admin::instance().addSegFaultCount(segFaultCount);
+#endif
+#else
+ if (WSHandler)
+ {
+ std::stringstream stream;
+ stream << "segfaultcount " << segFaultCount << "\n";
+ int ret = WSHandler->sendMessage(stream.str());
+ if (ret == -1)
+ {
+ LOG_WRN("Could not send 'segfaultcount' message through websocket");
+ }
+ else
+ {
+ LOG_WRN("Successfully sent 'segfaultcount' message " << stream.str());
+ }
+ }
+#endif
+ }
+
// Now delete the jails.
for (const auto& path : jails)
{
diff --git a/wsd/Admin.cpp b/wsd/Admin.cpp
index 88b9268df..3fc0b6955 100644
--- a/wsd/Admin.cpp
+++ b/wsd/Admin.cpp
@@ -593,6 +593,11 @@ void Admin::setDocWopiUploadDuration(const std::string& docKey, const std::chron
addCallback([=]{ _model.setDocWopiUploadDuration(docKey, uploadDuration); });
}
+void Admin::addSegFaultCount(unsigned segFaultCount)
+{
+ addCallback([=]{ _model.addSegFaultCount(segFaultCount); });
+}
+
void Admin::notifyForkit()
{
std::ostringstream oss;
diff --git a/wsd/Admin.hpp b/wsd/Admin.hpp
index e39d89fe7..6287d38bc 100644
--- a/wsd/Admin.hpp
+++ b/wsd/Admin.hpp
@@ -129,6 +129,7 @@ public:
void setViewLoadDuration(const std::string& docKey, const std::string& sessionId, std::chrono::milliseconds viewLoadDuration);
void setDocWopiDownloadDuration(const std::string& docKey, std::chrono::milliseconds wopiDownloadDuration);
void setDocWopiUploadDuration(const std::string& docKey, const std::chrono::milliseconds uploadDuration);
+ void addSegFaultCount(unsigned segFaultCount);
void getMetrics(std::ostringstream &metrics);
diff --git a/wsd/AdminModel.cpp b/wsd/AdminModel.cpp
index 22e402a83..f19d6e367 100644
--- a/wsd/AdminModel.cpp
+++ b/wsd/AdminModel.cpp
@@ -771,6 +771,11 @@ void AdminModel::setDocWopiUploadDuration(const std::string& docKey, const std::
it->second.setWopiUploadDuration(wopiUploadDuration);
}
+void AdminModel::addSegFaultCount(unsigned segFaultCount)
+{
+ _segFaultCount += segFaultCount;
+}
+
int filterNumberName(const struct dirent *dir)
{
return !fnmatch("[0-9]*", dir->d_name, 0);
@@ -993,6 +998,7 @@ void AdminModel::getMetrics(std::ostringstream &oss)
oss << "kit_count " << kitStats.unassignedCount + kitStats.assignedCount << std::endl;
oss << "kit_unassigned_count " << kitStats.unassignedCount << std::endl;
oss << "kit_assigned_count " << kitStats.assignedCount << std::endl;
+ oss << "kit_segfault_count " << _segFaultCount << std::endl;
PrintKitAggregateMetrics(oss, "thread_count", "", kitStats._threadCount);
PrintKitAggregateMetrics(oss, "memory_used", "bytes", docStats._kitUsedMemory._all);
PrintKitAggregateMetrics(oss, "cpu_time", "seconds", kitStats._cpuTime);
diff --git a/wsd/AdminModel.hpp b/wsd/AdminModel.hpp
index be5d9dd0f..b097289ef 100644
--- a/wsd/AdminModel.hpp
+++ b/wsd/AdminModel.hpp
@@ -249,6 +249,7 @@ class AdminModel
public:
AdminModel() :
+ _segFaultCount(0),
_owner(std::this_thread::get_id())
{
LOG_INF("AdminModel ctor.");
@@ -316,6 +317,7 @@ public:
void setViewLoadDuration(const std::string& docKey, const std::string& sessionId, std::chrono::milliseconds viewLoadDuration);
void setDocWopiDownloadDuration(const std::string& docKey, std::chrono::milliseconds wopiDownloadDuration);
void setDocWopiUploadDuration(const std::string& docKey, const std::chrono::milliseconds wopiUploadDuration);
+ void addSegFaultCount(unsigned segFaultCount);
void setForKitPid(pid_t pid) { _forKitPid = pid; }
void getMetrics(std::ostringstream &oss);
@@ -360,6 +362,8 @@ private:
uint64_t _sentBytesTotal;
uint64_t _recvBytesTotal;
+ uint64_t _segFaultCount;
+
pid_t _forKitPid;
/// We check the owner even in the release builds, needs to be always correct.
diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp
index e9e17b86f..c732169c7 100644
--- a/wsd/LOOLWSD.cpp
+++ b/wsd/LOOLWSD.cpp
@@ -835,9 +835,25 @@ void ForKitProcWSHandler::handleMessage(const std::vector<char> &data)
const std::string firstLine = LOOLProtocol::getFirstLine(&data[0], data.size());
const StringVector tokens = LOOLProtocol::tokenize(firstLine.data(), firstLine.size());
- // Just add here the processing of specific received messages
-
- LOG_ERR("ForKitProcWSHandler: unknown command: " << tokens[0]);
+ if (tokens.equals(0, "segfaultcount"))
+ {
+ int count = std::stoi(tokens[1]);
+ if (count >= 0)
+ {
+#if !MOBILEAPP
+ Admin::instance().addSegFaultCount(count);
+#endif
+ LOG_INF(count << " loolkit processes crashed with segmentation fault.");
+ }
+ else
+ {
+ LOG_WRN("Invalid 'segfaultcount' message received.");
+ }
+ }
+ else
+ {
+ LOG_ERR("ForKitProcWSHandler: unknown command: " << tokens[0]);
+ }
}
LOOLWSD::LOOLWSD()
More information about the Libreoffice-commits
mailing list