[RCF wayland] client: extend error handling

Tue Apr 15 06:36:54 PDT 2014

On Fri, 11 Apr 2014 11:39:13 +0200
Marek Chalupa <mchqwerty at gmail.com> wrote:

> When an error occures, than wl_display_get_error() do not
> provide any way of getting know if it was a local error or if it was
> an error event, respectively what object caused the error and what
> the error was.
> 
> This patch introduces a new function wl_display_get_protocol_error()
> which will return error code, interface and id of the object that
> generated the error.
> wl_display_get_error() will work the same way as before.
> 
> wl_display_get_protcol_error() DO NOT indicate that the error happed.
> It returns valid information only in that case that (protocol) error
> happend, so it should be used after calling wl_display_get_error()
> with positive result.
> 
> Thanks to Pekka Paalanen <ppaalanen at gmail.com> for pointing out
> issues in the first versions of this patch.
> ---
>  src/wayland-client.c | 149 +++++++++++++++++++++++++++++++++++++++++++++------
>  src/wayland-client.h |   3 ++
>  2 files changed, 136 insertions(+), 16 deletions(-)
> 
> diff --git a/src/wayland-client.c b/src/wayland-client.c
> index bd40313..7f21fcd 100644
> --- a/src/wayland-client.c
> +++ b/src/wayland-client.c
> @@ -78,7 +78,25 @@ struct wl_event_queue {
>  struct wl_display {
>  	struct wl_proxy proxy;
>  	struct wl_connection *connection;
> +
> +	/* errno of the last wl_display error or 1 when the error
> +	 * came via wire as an event */

1 is EPERM, we might just pick an errno that fits best, and call it
that.

But does the code really do this?

I suppose we are still free to pick the errno, since the implementation
so far has been buggy, interpreting all protocol error codes in the
wl_display context and picking the errno based on that, instead
accounting for the actual interface it came with.

>  	int last_error;
> +
> +	/* When display gets an error event from some object, it stores
> +	 * its credentials here, so that client can get detailed information

Would s/credentials/signature/ be more appropriate?

> +	 * about the error afterwards */
> +	struct {
> +		/* Code of the error. It can be compared to
> +		 * the interface's errors enumeration. */
> +		int code;
> +		/* interface (protocol) in wich the error occured */
> +		const struct wl_interface *interface;
> +		/* id of the proxy that caused the error. There's no warranty
> +		 * that the proxy is still valid. It's up to client how it will
> +		 * use it */
> +		unsigned int id;
> +	} protocol_error;

Yup, that is probably all the info we can give or might need.

>  	int fd;
>  	pthread_t display_thread;
>  	struct wl_map objects;
> @@ -96,6 +114,14 @@ struct wl_display {
>  
>  static int debug_client = 0;
>  
> +/**
> + * This function is called for local errors (no memory, server hung up)
> + *
> + * \param display
> + * \param error    error value (EINVAL, EFAULT, ...)
> + *
> + * \note this function is called with display mutex locked
> + */
>  static void
>  display_fatal_error(struct wl_display *display, int error)
>  {
> @@ -105,7 +131,7 @@ display_fatal_error(struct wl_display *display, int error)
>  		return;
>  
>  	if (!error)
> -		error = 1;
> +		error = EFAULT;
>  
>  	display->last_error = error;
>  
> @@ -113,11 +139,49 @@ display_fatal_error(struct wl_display *display, int error)
>  		pthread_cond_broadcast(&iter->cond);
>  }
>  
> +/**
> + * This function is called for error events
> + * and idicates that in some object occured an error.
> + * Difference between this function and display_fatal_error()
> + * is that this one handles errors that will come in wire, whereas
> + * display_fatal_error() is called for local errors.
> + *
> + * \param display
> + * \param code    error code
> + * \param id      id of the object that generated the error
> + * \param intf    protocol interface
> + * \param err     errno for compatibility of wl_display_get_error() or 1
> + */
>  static void
> -wl_display_fatal_error(struct wl_display *display, int error)
> +display_protocol_error(struct wl_display *display, int code,
> +		       unsigned int id, const struct wl_interface *intf,
> +		       int err)
>  {
> +	struct wl_event_queue *iter;
> +
> +	/* error should be a number from an enumeration */
> +	assert(code >= 0);

'code' comes from the wire, right?
Then this assert should not be here, because even if the server sends
something bad, it's no reason to abort when we are going to shut down
anyway.

An error within an error is no reason to quit delivering the original
error if possible. ;-)

> +	/* it's either 1 or ESOMETHING */
> +	assert(err >= 0);
> +
> +	if (display->last_error)
> +		return;
> +
>  	pthread_mutex_lock(&display->mutex);
> -	display_fatal_error(display, error);
> +
> +	 /* Since wl_display_get_error() returns errno,
> +	  * we must set it here too for the sake of
> +	  * compatibility (and, of course, for prevention of
> +	  * multiple broadcasting iter->cond) */
> +	display->last_error = err;
> +
> +	display->protocol_error.code = code;
> +	display->protocol_error.id = id;
> +	display->protocol_error.interface = intf;
> +
> +	wl_list_for_each(iter, &display->event_queue_list, link)
> +		pthread_cond_broadcast(&iter->cond);
> +
>  	pthread_mutex_unlock(&display->mutex);
>  }
>  
> @@ -579,25 +643,32 @@ display_handle_error(void *data,
>  		     uint32_t code, const char *message)
>  {
>  	struct wl_proxy *proxy = object;
> -	int err;
> +	int err = 1;
>  
>  	wl_log("%s@%u: error %d: %s\n",
>  	       proxy->object.interface->name, proxy->object.id, code, message);
>  
> -	switch (code) {
> -	case WL_DISPLAY_ERROR_INVALID_OBJECT:
> -	case WL_DISPLAY_ERROR_INVALID_METHOD:
> -		err = EINVAL;
> -		break;
> -	case WL_DISPLAY_ERROR_NO_MEMORY:
> -		err = ENOMEM;
> -		break;
> -	default:
> -		err = EFAULT;
> -		break;
> +	/* due to compatibility, we must pass an errno to display_protocol_error()
> +	 * (for setting display->last_error). I can imagine that later this code
> +	 * is deleted and protocol errors will have display->last_error set to 1
> +	 */
> +	if (wl_interface_equal(proxy->object.interface, &wl_display_interface)) {
> +		switch (code) {
> +			case WL_DISPLAY_ERROR_INVALID_OBJECT:
> +			case WL_DISPLAY_ERROR_INVALID_METHOD:
> +			        err = EINVAL;
> +			        break;
> +			case WL_DISPLAY_ERROR_NO_MEMORY:
> +			        err = ENOMEM;
> +			        break;
> +			default:
> +			        err = EFAULT;
> +			        break;
> +		}

This is what referred to at the top, we still set the errno as before
instead of 1, right?

I think we need to keep this, so we don't change the
behaviour in the valid use case of wl_display_get_error().

>  	}
>  
> -	wl_display_fatal_error(display, err);
> +	display_protocol_error(display, code, proxy->object.id,
> +			       proxy->object.interface, err);
>  }
>  
>  static void
> @@ -1489,6 +1560,52 @@ wl_display_get_error(struct wl_display *display)
>  	return ret;
>  }
>  
> +/**
> + * Retrieve the information about an protocol error
> + *
> + * If no protocol error occured, then the interface returned is NULL and id is 0.
> + * First you sould call wl_display_get_error() to find out if an error occured
> + * and then you can check if it was local error or protocol error.

Usually we find out about errors by a libwayland-client function
returning failure, and then wl_display_get_error() just provides
additional information, contrary to something like glGetError().

> + *
> + * \param display    display proxy

It's not a wl_proxy but the real thing.

> + * \param interface  if not NULL, store there and interface on which the error
> + *                   occured
> + * \param id         if not NULL, store there the id of the object that generated
> + *                   the error. There's no warranty that the object is still valid.
> + *                   Client must know if he deleted the object or not.
> + * \return           Code of the error
> + *
> + * \code
> + * int err = wl_display_get_error(display);
> + *
> + * if (err) {
> + *        code = wl_display_get_protocol_error(display, &interface, &id);
> + *        handle_error(code, interface, id);
> + * }
> + *  \endcode
> + */
> +WL_EXPORT int
> +wl_display_get_protocol_error(struct wl_display *display,
> +			      const struct wl_interface **interface,
> +			      unsigned int *id)
> +{
> +	int ret;
> +
> +	pthread_mutex_lock(&display->mutex);
> +
> +	ret = display->protocol_error.code;
> +
> +	if (interface)
> +		*interface = display->protocol_error.interface;
> +	if (id)
> +		*id = display->protocol_error.id;
> +
> +	pthread_mutex_unlock(&display->mutex);
> +
> +	return ret;
> +}
> +
> +
>  /** Send all buffered requests on the display to the server
>   *
>   * \param display The display context object
> diff --git a/src/wayland-client.h b/src/wayland-client.h
> index 2a32785..61aec0c 100644
> --- a/src/wayland-client.h
> +++ b/src/wayland-client.h
> @@ -161,6 +161,9 @@ int wl_display_dispatch_queue_pending(struct wl_display *display,
>  				      struct wl_event_queue *queue);
>  int wl_display_dispatch_pending(struct wl_display *display);
>  int wl_display_get_error(struct wl_display *display);
> +int wl_display_get_protocol_error(struct wl_display *display,
> +				  const struct wl_interface **interface,
> +				  unsigned int *id);
>  
>  int wl_display_flush(struct wl_display *display);
>  int wl_display_roundtrip(struct wl_display *display);

Right, I like this idea, not only because it was something I wished
for. :-)

I read through this patch, but didn't have time to do an in-depth
review of the correctness yet.

Thanks,
pq