[PATCH weston 2/3] shell: fix race on desktop-shell exit

Pekka Paalanen ppaalanen at gmail.com
Wed Aug 27 03:41:44 PDT 2014


From: Pekka Paalanen <pekka.paalanen at collabora.co.uk>

The desktop shell plugin registers both a wl_client destroy signal
listener, and a sigchld handler, when launching weston-desktop-shell.
However, nothing guarantees in which order do the wl_client destructor
and the sigchld handler run.

Luckily, the sigchld handler cannot interrupt any code, because we
handle the signal via signalfd, which means it is handled like any event
in the compositor's main event loop.

Still, shell.c has a race, that when lost, can cause a crash, as
described in bug #82957.

If the sigchld handler happens to run first, it will try to launch a new
weston-desktop-shell without removing the destroy listener from the old
wl_client first. This leads to list corruption, that may cause a crash
when the old wl_client gets destroyed.

Simply removing the destroy listener in the sigchld handler is not
enough, because respawning sets shell->child.client pointer, and if
the wl_client destructor runs after, it will reset it to NULL.

OTOH, the wl_client destroy handler cannot reset shell->child.process,
because that would cause the sigchld handler in weston core to not find
the process tracker anymore, and report that an unknown process exited.

Turns out, that to make everything work, we would need to wait for both
the wl_client destructor and the sigchld handler to have run, before
respawn. This gets tricky.

Instead, solve the problem by removing shell->child.process. Use the new
weston_client_start() which automatically creates and manages the struct
weston_process. The shell does not need to know about the process exit,
it only needs to know about the client disconnect. Weston-desktop-shell
will never attempt to reconnect, and it would not work even if it did,
so disconnect is equivalent to weston-desktop-shell exiting.

This should permanently solve the race for weston-desktop-shell.

Bug: https://bugs.freedesktop.org/show_bug.cgi?id=82957
Cc: Boyan Ding <stu_dby at 126.com>
Cc: Derek Foreman <derekf at osg.samsung.com>
Signed-off-by: Pekka Paalanen <pekka.paalanen at collabora.co.uk>
---
 desktop-shell/shell.c | 35 +++++++++++++++++++++--------------
 desktop-shell/shell.h |  1 -
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/desktop-shell/shell.c b/desktop-shell/shell.c
index 99f3343..f82c47a 100644
--- a/desktop-shell/shell.c
+++ b/desktop-shell/shell.c
@@ -5294,14 +5294,9 @@ shell_surface_configure(struct weston_surface *es, int32_t sx, int32_t sy)
 static void launch_desktop_shell_process(void *data);
 
 static void
-desktop_shell_sigchld(struct weston_process *process, int status)
+respawn_desktop_shell_process(struct desktop_shell *shell)
 {
 	uint32_t time;
-	struct desktop_shell *shell =
-		container_of(process, struct desktop_shell, child.process);
-
-	shell->child.process.pid = 0;
-	shell->child.client = NULL; /* already destroyed by wayland */
 
 	/* if desktop-shell dies more than 5 times in 30 seconds, give up */
 	time = weston_compositor_get_time();
@@ -5312,13 +5307,12 @@ desktop_shell_sigchld(struct weston_process *process, int status)
 
 	shell->child.deathcount++;
 	if (shell->child.deathcount > 5) {
-		weston_log("%s died, giving up.\n", shell->client);
+		weston_log("%s disconnected, giving up.\n", shell->client);
 		return;
 	}
 
-	weston_log("%s died, respawning...\n", shell->client);
+	weston_log("%s disconnected, respawning...\n", shell->client);
 	launch_desktop_shell_process(shell);
-	shell_fade_startup(shell);
 }
 
 static void
@@ -5329,7 +5323,18 @@ desktop_shell_client_destroy(struct wl_listener *listener, void *data)
 	shell = container_of(listener, struct desktop_shell,
 			     child.client_destroy_listener);
 
+	wl_list_remove(&shell->child.client_destroy_listener.link);
 	shell->child.client = NULL;
+	/*
+	 * unbind_desktop_shell() will reset shell->child.desktop_shell
+	 * before the respawned process has a chance to create a new
+	 * desktop_shell object, because we are being called from the
+	 * wl_client destructor which destroys all wl_resources before
+	 * returning.
+	 */
+
+	respawn_desktop_shell_process(shell);
+	shell_fade_startup(shell);
 }
 
 static void
@@ -5337,10 +5342,8 @@ launch_desktop_shell_process(void *data)
 {
 	struct desktop_shell *shell = data;
 
-	shell->child.client = weston_client_launch(shell->compositor,
-						 &shell->child.process,
-						 shell->client,
-						 desktop_shell_sigchld);
+	shell->child.client = weston_client_start(shell->compositor,
+						  shell->client);
 
 	if (!shell->child.client)
 		weston_log("not able to start %s\n", shell->client);
@@ -6152,8 +6155,12 @@ shell_destroy(struct wl_listener *listener, void *data)
 
 	/* Force state to unlocked so we don't try to fade */
 	shell->locked = false;
-	if (shell->child.client)
+
+	if (shell->child.client) {
+		/* disable respawn */
+		wl_list_remove(&shell->child.client_destroy_listener.link);
 		wl_client_destroy(shell->child.client);
+	}
 
 	wl_list_remove(&shell->idle_listener.link);
 	wl_list_remove(&shell->wake_listener.link);
diff --git a/desktop-shell/shell.h b/desktop-shell/shell.h
index c6ea328..67c5f50 100644
--- a/desktop-shell/shell.h
+++ b/desktop-shell/shell.h
@@ -134,7 +134,6 @@ struct desktop_shell {
 	struct weston_surface *grab_surface;
 
 	struct {
-		struct weston_process process;
 		struct wl_client *client;
 		struct wl_resource *desktop_shell;
 		struct wl_listener client_destroy_listener;
-- 
1.8.5.5



More information about the wayland-devel mailing list