[PATCH] client: Allow send error recovery without an abort

Wed Jun 6 01:14:54 UTC 2018

Introduce a new call wl_display_set_error_handler_client(), which allows
a client to register a callback function which is invoked if there is an
error (possibly transient) while sending messages to the server.

This allows a Wayland client that is actually a nested server to try and
recover more gracefully from send errors, allowing it one to disconnect
and reconnect to the server if necessary.

The handler is called with the wl_display and the errno, and is expected
to return one of WL_SEND_ERROR_ABORT, WL_SEND_ERROR_FATAL, or
WL_SEND_ERROR_RETRY. The core-client code will then respectively abort()
the process (the default if no handler is set), set the display context
into an error state, or retry the send operation that failed.

The existing display test is extended to exercise the new function.

Signed-off-by: Lloyd Pique <lpique at google.com>
---
 COPYING                   |   1 +
 src/wayland-client-core.h |  75 +++++++++++++++++
 src/wayland-client.c      |  67 +++++++++++++++-
 tests/display-test.c      | 165 +++++++++++++++++++++++++++++++++++++-
 4 files changed, 306 insertions(+), 2 deletions(-)

diff --git a/COPYING b/COPYING
index eb25a4e..bace5d7 100644
--- a/COPYING
+++ b/COPYING
@@ -2,6 +2,7 @@ Copyright © 2008-2012 Kristian Høgsberg
 Copyright © 2010-2012 Intel Corporation
 Copyright © 2011 Benjamin Franzke
 Copyright © 2012 Collabora, Ltd.
+Copyright © 2018 Google LLC
 
 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and associated documentation files (the "Software"),
diff --git a/src/wayland-client-core.h b/src/wayland-client-core.h
index 03e781b..a3e4b8e 100644
--- a/src/wayland-client-core.h
+++ b/src/wayland-client-core.h
@@ -257,6 +257,81 @@ wl_display_cancel_read(struct wl_display *display);
 int
 wl_display_read_events(struct wl_display *display);
 
+/** \enum wl_send_error_strategy
+ *
+ * This enum are the valid values that can be returned from a
+ * wl_send_error_handler_func_t
+ *
+ * \sa wl_send_error_handler_func_t
+ */
+enum wl_send_error_strategy {
+	/** Abort the client */
+	WL_SEND_ERROR_ABORT,
+	/** Put the display into a fatal error state */
+	WL_SEND_ERROR_FATAL,
+	/** Retry the send operation */
+	WL_SEND_ERROR_RETRY,
+};
+
+/**
+ * \brief A function pointer type for customizing client send error handling
+ *
+ * A client send error strategy handler is a callback function which the client
+ * can set to direct the core client library how to respond if an error is
+ * encountered while sending a message.
+ *
+ * Normally messages are enqueued to an output buffer by the core client
+ * library, and the user of the core client library is expected to call
+ * wl_display_flush() occasionally, but as that is non-blocking, messages can
+ * queue up in the output buffer.  If later calls to send messages happen to
+ * fill up the output buffer, the core client library will make an internal
+ * call to flush the output buffer.  But if the write call unexpectedly fails,
+ * as there is no good way to return an error to the caller, the core client
+ * library will log a message and call abort().
+ *
+ * If instead a send error strategy function is set, the core client library
+ * will call it to allow the client to choose a different strategy.
+ *
+ * The function takes two arguments.  The first is the display context for the
+ * error.  The second is the errno for the failure that occurred.
+ *
+ * The handler is expected to return one of the wl_send_error_strategy values
+ * to indicate how the core client library should proceed.
+ *
+ * If the handler returns WL_CLIENT_ERROR_ABORT, the core client code will
+ * perform the default action -- log an error and then call the C runtime
+ * abort() function.
+ *
+ * If the handler returns WL_CLIENT_ERROR_FATAL, the core client code will
+ * continue, but the display context will be put into a fatal error state, and
+ * the display should no longer be used. Further calls to send messages
+ * will function, but the message will not actually be enqueued.
+ *
+ * If the handler returns WL_CLIENT_ERROR_RETRY, the send operation is retried
+ * immediately on return, and may fail again.  If it does, the handler will be
+ * called again with the new errno. To avoid a busy loop, the handler should
+ * wait on the display connection, or something equivalent.  If after returning
+ * WL_CLIENT_ERROR_RETRY the send operation succeeds, the handler is called one
+ * last time with an errno of zero. This allows the handler to perform any
+ * necessary cleanup now that the send operation has succeeded. For this final
+ * call, the return value is ignored.
+ *
+ * Note that if the handler blocks, the current thread that is making the call
+ * to send a message will of course be blocked. This means the default behavior
+ * of Wayland client calls being non-blocking will no longer be true.
+ *
+ * In a multi threaded environment, this error handler could be called once for
+ * every thread that attempts to send a message on the display and encounters
+ * the error with the connection. It is the responsibility of the handler
+ * function be itself thread safe.
+ */
+typedef enum wl_send_error_strategy
+	(*wl_send_error_handler_func_t)(struct wl_display *, int /* errno */);
+
+void
+wl_display_set_send_error_handler_client(struct wl_display *display,
+				         wl_send_error_handler_func_t handler);
+
 void
 wl_log_set_handler_client(wl_log_func_t handler);
 
diff --git a/src/wayland-client.c b/src/wayland-client.c
index efeb745..3a9db6f 100644
--- a/src/wayland-client.c
+++ b/src/wayland-client.c
@@ -113,6 +113,8 @@ struct wl_display {
 	int reader_count;
 	uint32_t read_serial;
 	pthread_cond_t reader_cond;
+
+	wl_send_error_handler_func_t send_error_handler;
 };
 
 /** \endcond */
@@ -696,6 +698,44 @@ wl_proxy_marshal_array_constructor(struct wl_proxy *proxy,
 							    proxy->version);
 }
 
+static enum wl_send_error_strategy
+display_send_error_handler_invoke(struct wl_display *display, int send_errno)
+{
+	enum wl_send_error_strategy ret = WL_SEND_ERROR_ABORT;
+	wl_send_error_handler_func_t handler = display->send_error_handler;
+	if (handler) {
+		pthread_mutex_unlock(&display->mutex);
+		ret = handler(display, send_errno);
+		pthread_mutex_lock(&display->mutex);
+	}
+	return ret;
+}
+
+static void
+display_closure_send_error(struct wl_display *display,
+	                   struct wl_closure *closure)
+{
+	while (true) {
+		int send_errno = errno;
+		switch (display_send_error_handler_invoke(display, send_errno)) {
+		case WL_SEND_ERROR_FATAL:
+			display_fatal_error(display, send_errno);
+			return;
+
+		case WL_SEND_ERROR_RETRY:
+			if (!wl_closure_send(closure, display->connection)) {
+				display_send_error_handler_invoke(display, 0);
+				return;
+			}
+			break;
+
+		case WL_SEND_ERROR_ABORT:
+		default:
+			wl_abort("Error sending request: %s\n",
+				 strerror(send_errno));
+		}
+	}
+}
 
 /** Prepare a request to be sent to the compositor
  *
@@ -743,6 +783,9 @@ wl_proxy_marshal_array_constructor_versioned(struct wl_proxy *proxy,
 			goto err_unlock;
 	}
 
+	if (proxy->display->last_error)
+		goto err_unlock;
+
 	closure = wl_closure_marshal(&proxy->object, opcode, args, message);
 	if (closure == NULL)
 		wl_abort("Error marshalling request: %s\n", strerror(errno));
@@ -751,7 +794,7 @@ wl_proxy_marshal_array_constructor_versioned(struct wl_proxy *proxy,
 		wl_closure_print(closure, &proxy->object, true);
 
 	if (wl_closure_send(closure, proxy->display->connection))
-		wl_abort("Error sending request: %s\n", strerror(errno));
+		display_closure_send_error(proxy->display, closure);
 
 	wl_closure_destroy(closure);
 
@@ -2000,6 +2043,28 @@ wl_display_flush(struct wl_display *display)
 	return ret;
 }
 
+/** Sets the error handler to invoke when a send fails internally
+*
+* \param display The display context object
+* \param handler The function to call when an error occurs.
+*
+* Sets the error handler to call to decide what should happen on a send error.
+*
+* If no handler is set, the client will use the WL_SEND_ERROR_ABORT strategy.
+* This means the core client code will call abort() if if encounters a failure
+* while sending a message to the server.
+*
+* Setting a handler allows a client implementation to alter this behavior.
+*
+* \memberof wl_display
+*/
+WL_EXPORT void
+wl_display_set_send_error_handler_client(struct wl_display *display,
+					 wl_send_error_handler_func_t handler)
+{
+	display->send_error_handler = handler;
+}
+
 /** Set the user data associated with a proxy
  *
  * \param proxy The proxy object
diff --git a/tests/display-test.c b/tests/display-test.c
index 9b49a0e..8b2993d 100644
--- a/tests/display-test.c
+++ b/tests/display-test.c
@@ -1282,7 +1282,8 @@ double_zombie_client(void *data)
 	struct wl_registry *registry;
 
 	registry = wl_display_get_registry(c->wl_display);
-	wl_registry_add_listener(registry, &double_zombie_fd_registry_listener, NULL);
+	wl_registry_add_listener(registry, &double_zombie_fd_registry_listener,
+		                 NULL);
 
 	/* Gets the registry */
 	wl_display_roundtrip(c->wl_display);
@@ -1315,3 +1316,165 @@ TEST(zombie_fd_errant_consumption)
 
 	display_destroy(d);
 }
+
+static void
+fill_client_send_buffers(struct wl_display *display)
+{
+	int sync_count = 4096;
+	int socket_buf = 1024;
+
+        /* Use the minimum send buffer size. We want the client to be
+         * able to fill the buffer easily. */
+	assert(setsockopt(wl_display_get_fd(display), SOL_SOCKET, SO_SNDBUF,
+		         &socket_buf, sizeof(socket_buf)) == 0);
+
+	/* Fill up the client and server buffers With the default error
+	 * handling, this will abort the client. */
+	while(sync_count--) {
+		wl_callback_destroy(wl_display_sync(display));
+	}
+}
+
+static enum wl_send_error_strategy
+fatal_error_handler(struct wl_display *display, int send_errno)
+{
+	assert(send_errno == EAGAIN);
+	return WL_SEND_ERROR_FATAL;
+}
+
+static void
+noabort_client(void)
+{
+	struct client *c = client_connect();
+
+	/* On a send error, set a the display context into an error state
+	 * rather than calling abort(). */
+	wl_display_set_send_error_handler_client(c->wl_display,
+		                                 fatal_error_handler);
+
+	/* Try and bind a wl_set, which isn't used but has a side effect. */
+	wl_proxy_destroy((struct wl_proxy *)client_get_seat(c));
+
+	/* Verify we do not have any errors at this point */
+	assert(wl_display_get_error(c->wl_display) == 0);
+
+	fill_client_send_buffers(c->wl_display);
+
+	/* Verify that we see the error that was set during the send. */
+	assert(wl_display_get_error(c->wl_display) == EAGAIN);
+
+	client_disconnect_nocheck(c);
+}
+
+static void
+terminate_instead_of_bind(struct wl_client *client, void *data,
+			 uint32_t version, uint32_t id)
+{
+	wl_display_terminate(((struct display *)data)->wl_display);
+}
+
+TEST(noabort_client_tst)
+{
+	struct display *d = display_create();
+
+	test_set_timeout(2);
+
+	client_create_noarg(d, noabort_client);
+
+	/* The client must connect before simulating an unresponsive state. Use
+	 * the request for a set to terminate the event loop. */
+	wl_global_create(d->wl_display, &wl_seat_interface,
+			 1, d, terminate_instead_of_bind);
+
+	/* This handles the initial connect, then returns. */
+	display_run(d);
+
+	/* Not processing any events for a moment should allow the buffers
+	 * to be filled up by the client, and it to enter an error state then
+	 * disconnect. */
+	test_sleep(1);
+
+	/* We need one last dispatch to handle the terminated client. */
+	wl_event_loop_dispatch(wl_display_get_event_loop(d->wl_display), 0);
+
+	display_destroy(d);
+}
+
+static enum wl_send_error_strategy
+retry_with_poll_handler(struct wl_display *display, int send_errno)
+{
+	struct pollfd pfd;
+
+	*(int *)wl_display_get_user_data(display) = send_errno;
+
+	if (send_errno != 0) {
+		assert(send_errno == EAGAIN);
+
+		pfd.fd = wl_display_get_fd(display);
+		pfd.events = POLLOUT;
+		assert(poll(&pfd, 1, -1) == 1);
+	}
+
+	return WL_SEND_ERROR_RETRY;
+}
+
+static void
+retry_with_poll_client(void)
+{
+	int last_handler_errno = -1;
+	struct client *c = client_connect();
+
+	/* On a send error, retry the operation with a poll using the
+	 * specified handler. */
+	wl_display_set_send_error_handler_client(c->wl_display,
+		                                 retry_with_poll_handler);
+
+	/* Try and bind a wl_set, which isn't used but has a side effect. */
+	wl_proxy_destroy((struct wl_proxy *)client_get_seat(c));
+
+	wl_display_set_user_data(c->wl_display, &last_handler_errno);
+
+	/* Verify we do not have any errors at this point */
+	assert(wl_display_get_error(c->wl_display) == 0);
+
+	/* Fill up the client and server buffers With the default error
+	 * handling, this will abort the client. */
+	fill_client_send_buffers(c->wl_display);
+
+	/* Verify we do not have any errors at this point */
+	assert(wl_display_get_error(c->wl_display) == 0);
+
+	/* The error handler stores the last errno handled. When the handler
+	 * returns WL_SEND_ERROR_RETRY, and the send retries successfully, the
+	 * handler is invoked again with an errno of zero, so that the handler
+	 * can handler can reset its state if desired. */
+	assert(last_handler_errno == 0);
+
+	client_disconnect_nocheck(c);
+}
+
+TEST(retry_client_tst)
+{
+	struct display *d = display_create();
+
+	test_set_timeout(2);
+
+	client_create_noarg(d, retry_with_poll_client);
+
+	/* The client must connect before simulating an unresponsive state. Use
+	 * the request for a set to terminate the event loop. */
+	wl_global_create(d->wl_display, &wl_seat_interface,
+			 1, d, terminate_instead_of_bind);
+
+	/* This handles the initial connect, then returns. */
+	display_run(d);
+
+	/* Not processing any events for a moment should allow the buffers
+	 * to be filled up by the client, and it should poll and retry. */
+	test_sleep(1);
+
+	/* Complete processing normally. */
+	display_run(d);
+
+	display_destroy(d);
+}
-- 
2.17.1.1185.g55be947832-goog