[PATCH weston v3 2/2] compositor-drm: Watchdog timer implementation

Pekka Paalanen ppaalanen at gmail.com
Tue Nov 18 06:47:45 PST 2014


On Fri, 31 Oct 2014 06:25:08 -0400
Frederic Plourde <frederic.plourde at collabora.co.uk> wrote:

> Weston will not repaint until previous update has been acked by
> a page-flip event coming from the drm driver. However, some buggy drivers
> won't return those events or will stop sending them at some point and
> Weston output repaints will completely freeze. To ease developers' task in
> testing their drivers, this patch makes compositor-drm use a watchdog timer
> to detect cases where those page-flip events stop coming.
> 
> This watchdog timer (WDT) implementation is software only and includes
> basic features usually found in a WDT. We simply exit Weston gracefully
> with an exit code when the dog barks.
> 
> The watchdog timeout value can be set via weston.ini by adding a
> watchdog-timer-timeout=<SECONDS> entry under a new [compositor-drm]
> section. Setting this timeout to 0 disables the watchdog feature.

You say seconds, but it's actually milliseconds, no?

IMHO, wouldn't even need to call it a watchdog at all, there is nothing
that should be generic here. It's a pageflip completion timeout.

> https://bugs.freedesktop.org/show_bug.cgi?id=83884
> Signed-off-by: Frederic Plourde <frederic.plourde at collabora.co.uk>
> ---
>  src/compositor-drm.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++

Update the weston-drm man page to document the new weston.ini key,
please.

>  1 file changed, 105 insertions(+)
> 
> diff --git a/src/compositor-drm.c b/src/compositor-drm.c
> index 07b83a7..4853328 100644
> --- a/src/compositor-drm.c
> +++ b/src/compositor-drm.c
> @@ -115,20 +115,22 @@ struct drm_compositor {
>  	int cursors_are_broken;
>  
>  	int use_pixman;
>  
>  	uint32_t prev_state;
>  
>  	struct udev_input input;
>  
>  	uint32_t cursor_width;
>  	uint32_t cursor_height;
> +
> +	uint32_t watchdog_timer_timeout;
>  };
>  
>  struct drm_mode {
>  	struct weston_mode base;
>  	drmModeModeInfo mode_info;
>  };
>  
>  struct drm_output;
>  
>  struct drm_fb {
> @@ -176,20 +178,25 @@ struct drm_output {
>  	struct drm_fb *current, *next;
>  	struct backlight *backlight;
>  
>  	struct drm_fb *dumb[2];
>  	pixman_image_t *image[2];
>  	int current_image;
>  	pixman_region32_t previous_damage;
>  
>  	struct vaapi_recorder *recorder;
>  	struct wl_listener recorder_frame_listener;
> +
> +	struct watchdog_timer {
> +		struct wl_event_source *timer;
> +		uint32_t timeout;

I don't think we need a per-output copy of the timeout length.

> +	} wdt;
>  };
>  
>  /*
>   * An output has a primary display plane plus zero or more sprites for
>   * blending display contents.
>   */
>  struct drm_sprite {
>  	struct wl_list link;
>  
>  	struct weston_plane plane;
> @@ -215,20 +222,90 @@ struct drm_parameters {
>  	int tty;
>  	int use_pixman;
>  	const char *seat_id;
>  };
>  
>  static struct gl_renderer_interface *gl_renderer;
>  
>  static const char default_seat[] = "seat0";
>  
>  static void
> +drm_output_watchdog_timer_bark(void *output) {
> +	/*
> +	* Our watchdog just barked at us, means we're not receiving drm
> +	* page flip events anymore for that output. Let's gracefully exit
> +	* weston with a return value so devs can debug what's going on.
> +	*/
> +	struct drm_output *out = (struct drm_output *) output;

Usually the void* is called "data", and the real typed variable would
be "output" then. No need to cast from a void*.

> +	struct weston_compositor *compositor =
> +		(struct weston_compositor *) out->base.compositor;

Mm, I don't think this needs a cast either.

> +
> +	weston_compositor_exit_with_code(compositor, EXIT_FAILURE);
> +}
> +
> +/* Creates the watchdog timer. Note that timer is not armed by default */
> +static int
> +drm_output_watchdog_timer_create(struct drm_output *output)
> +{
> +	struct wl_event_loop *loop = NULL;
> +	struct drm_compositor *ec =
> +		(struct drm_compositor *) output->base.compositor;
> +
> +	loop = wl_display_get_event_loop(ec->base.wl_display);
> +	if (loop) {

Not getting the event loop would be all kinds of fatal, so I'm not sure
there is a need to check it even. This would probably be the first
place that checks it.

> +		output->wdt.timer = wl_event_loop_add_timer(loop,
> +				  (wl_event_loop_timer_func_t)drm_output_watchdog_timer_bark,

Why do you cast the function pointer? Doing so can only hide mistakes,
while it has no benefits.

Btw. "drm_output_watchdog_timer_bark" is kind of generic.
"pageflip_timeout" would say exactly what it is for, IMHO.

> +				  output);
> +
> +		if (output->wdt.timer) {
> +			output->wdt.timeout = ec->watchdog_timer_timeout;
> +			return 0;
> +		}
> +	}
> +
> +	weston_log("creating drm watchdog timer failed: %m\n");
> +	return -1;
> +}
> +
> +/* Start/reset/arm the dog's timer. This can also be used to 'pat' the dog */
> +static int
> +drm_output_watchdog_timer_start(struct watchdog_timer *wdt)

We have a function naming convention that says that the argument to
this function should be struct drm_output, because the function name
starts with drm_output. It's our "member function of a class" hint.

> +{
> +	if (wdt->timer)
> +		return wl_event_source_timer_update(wdt->timer, wdt->timeout);

Here is a duplicate check, all the call sites already check it before
calling. Which actually means you will never see the error message
below...

Maybe just inline this into the call sites. No need to have this
function defined at all, I think.

> +
> +	weston_log("failed to start drm watchdog: %m\n");

If this gets called every frame, it's going to flood the logs if we
don't have a timer. It's also redundant with the message from
drm_output_watchdog_timer_create().

> +	return -1;
> +}
> +
> +/* Disarms the watchdog */
> +static int
> +drm_output_watchdog_timer_stop(struct watchdog_timer *wdt)
> +{
> +	if (wdt->timer)
> +		return wl_event_source_timer_update(wdt->timer, 0);
> +
> +	weston_log("failed to stop drm watchdog: %m\n");
> +	return -1;
> +}
> +
> +static void
> +drm_output_watchdog_timer_destroy(struct watchdog_timer *wdt)
> +{
> +	if (wdt->timer) {
> +		wl_event_source_remove(wdt->timer);
> +	}
> +
> +	wdt->timer = NULL;
> +}

These two functions get the same comments as the above one.

> +
> +static void
>  drm_output_set_cursor(struct drm_output *output);
>  
>  static int
>  drm_sprite_crtc_supported(struct weston_output *output_base, uint32_t supported)
>  {
>  	struct weston_compositor *ec = output_base->compositor;
>  	struct drm_compositor *c =(struct drm_compositor *) ec;
>  	struct drm_output *output = (struct drm_output *) output_base;
>  	int crtc;
>  
> @@ -617,20 +694,24 @@ drm_output_repaint(struct weston_output *output_base,
>  				     output->next->fb_id, 0, 0,
>  				     &output->connector_id, 1,
>  				     &mode->mode_info);
>  		if (ret) {
>  			weston_log("set mode failed: %m\n");
>  			goto err_pageflip;
>  		}
>  		output_base->set_dpms(output_base, WESTON_DPMS_ON);
>  	}
>  
> +	/* Start our watchdog timer */
> +	if (output->wdt.timer)
> +		drm_output_watchdog_timer_start(&output->wdt);
> +
>  	if (drmModePageFlip(compositor->drm.fd, output->crtc_id,
>  			    output->next->fb_id,
>  			    DRM_MODE_PAGE_FLIP_EVENT, output) < 0) {
>  		weston_log("queueing pageflip failed: %m\n");
>  		goto err_pageflip;
>  	}
>  
>  	output->page_flip_pending = 1;
>  
>  	drm_output_set_cursor(output);
> @@ -704,20 +785,24 @@ drm_output_start_repaint_loop(struct weston_output *output_base)
>  	if (output->destroy_pending)
>  		return;
>  
>  	if (!output->current) {
>  		/* We can't page flip if there's no mode set */
>  		goto finish_frame;
>  	}
>  
>  	fb_id = output->current->fb_id;
>  
> +	/* Start our watchdog timer */
> +	if (output->wdt.timer)
> +		drm_output_watchdog_timer_start(&output->wdt);
> +
>  	if (drmModePageFlip(compositor->drm.fd, output->crtc_id, fb_id,
>  			    DRM_MODE_PAGE_FLIP_EVENT, output) < 0) {
>  		weston_log("queueing pageflip failed: %m\n");
>  		goto finish_frame;
>  	}
>  
>  	return;
>  
>  finish_frame:
>  	/* if we cannot page-flip, immediately finish frame */
> @@ -744,20 +829,24 @@ vblank_handler(int fd, unsigned int frame, unsigned int sec, unsigned int usec,
>  	struct drm_output *output = s->output;
>  	struct timespec ts;
>  
>  	drm_output_update_msc(output, frame);
>  	output->vblank_pending = 0;
>  
>  	drm_output_release_fb(output, s->current);
>  	s->current = s->next;
>  	s->next = NULL;
>  
> +	/* Stop the watchdog instead of patting it here */
> +	if (output->wdt.timer)
> +		drm_output_watchdog_timer_stop(&output->wdt);

Hmm, yeah, this is probably safe. The vblank path is not used except
with overlays, and overlays are waiting for a complete rewrite with
nuclear pageflip. Very good.

> +
>  	if (!output->page_flip_pending) {
>  		ts.tv_sec = sec;
>  		ts.tv_nsec = usec * 1000;
>  		weston_output_finish_frame(&output->base, &ts);
>  	}
>  }
>  
>  static void
>  drm_output_destroy(struct weston_output *output_base);
>  
> @@ -774,20 +863,24 @@ page_flip_handler(int fd, unsigned int frame,
>  	 * we just want to page flip to the current buffer to get an accurate
>  	 * timestamp */
>  	if (output->page_flip_pending) {
>  		drm_output_release_fb(output, output->current);
>  		output->current = output->next;
>  		output->next = NULL;
>  	}
>  
>  	output->page_flip_pending = 0;
>  
> +	/* Stop the watchdog instead of patting it here */
> +	if (output->wdt.timer)
> +		drm_output_watchdog_timer_stop(&output->wdt);
> +
>  	if (output->destroy_pending)
>  		drm_output_destroy(&output->base);
>  	else if (!output->vblank_pending) {
>  		ts.tv_sec = sec;
>  		ts.tv_nsec = usec * 1000;
>  		weston_output_finish_frame(&output->base, &ts);
>  
>  		/* We can't call this from frame_notify, because the output's
>  		 * repaint needed flag is cleared just after that */
>  		if (output->recorder)
> @@ -1169,20 +1262,22 @@ drm_output_destroy(struct weston_output *output_base)
>  	c->crtc_allocator &= ~(1 << output->crtc_id);
>  	c->connector_allocator &= ~(1 << output->connector_id);
>  
>  	if (c->use_pixman) {
>  		drm_output_fini_pixman(output);
>  	} else {
>  		gl_renderer->output_destroy(output_base);
>  		gbm_surface_destroy(output->surface);
>  	}
>  
> +	drm_output_watchdog_timer_destroy(&output->wdt);
> +
>  	weston_plane_release(&output->fb_plane);
>  	weston_plane_release(&output->cursor_plane);
>  
>  	weston_output_destroy(&output->base);
>  
>  	free(output);
>  }
>  
>  static struct drm_mode *
>  choose_mode (struct drm_output *output, struct weston_mode *target_mode)
> @@ -2006,20 +2101,21 @@ create_output_for_connector(struct drm_compositor *ec,
>  					ec->format,
>  					&output->format) == -1)
>  		output->format = ec->format;
>  
>  	weston_config_section_get_string(section, "seat", &s, "");
>  	setup_output_seat_constraint(ec, &output->base, s);
>  	free(s);
>  
>  	output->crtc_id = resources->crtcs[i];
>  	output->pipe = i;
> +
>  	ec->crtc_allocator |= (1 << output->crtc_id);
>  	output->connector_id = connector->connector_id;
>  	ec->connector_allocator |= (1 << output->connector_id);
>  
>  	output->original_crtc = drmModeGetCrtc(ec->drm.fd, output->crtc_id);
>  	output->dpms_prop = drm_get_prop(ec->drm.fd, connector, "DPMS");
>  
>  	/* Get the current mode on the crtc that's currently driving
>  	 * this connector. */
>  	encoder = drmModeGetEncoder(ec->drm.fd, connector->encoder_id);
> @@ -2094,20 +2190,25 @@ create_output_for_connector(struct drm_compositor *ec,
>  		weston_log("no available modes for %s\n", output->base.name);
>  		goto err_free;
>  	}
>  
>  	output->base.current_mode->flags |= WL_OUTPUT_MODE_CURRENT;
>  
>  	weston_output_init(&output->base, &ec->base, x, y,
>  			   connector->mmWidth, connector->mmHeight,
>  			   transform, scale);
>  
> +	output->wdt.timer = NULL;
> +	if (ec->watchdog_timer_timeout) {
> +		drm_output_watchdog_timer_create(output);
> +	}
> +
>  	if (ec->use_pixman) {
>  		if (drm_output_init_pixman(output, ec) < 0) {
>  			weston_log("Failed to init output pixman state\n");
>  			goto err_output;
>  		}
>  	} else if (drm_output_init_egl(output, ec) < 0) {
>  		weston_log("Failed to init output gl state\n");
>  		goto err_output;
>  	}
>  
> @@ -2779,20 +2880,24 @@ drm_compositor_create(struct wl_display *display,
>  	/* KMS support for sprites is not complete yet, so disable the
>  	 * functionality for now. */
>  	ec->sprites_are_broken = 1;
>  
>  	section = weston_config_get_section(config, "core", NULL, NULL);
>  	if (get_gbm_format_from_section(section,
>  					GBM_FORMAT_XRGB8888,
>  					&ec->format) == -1)
>  		goto err_base;
>  
> +	section = weston_config_get_section(config, "compositor-drm", NULL, NULL);
> +	weston_config_section_get_uint(section, "watchdog-timer-timeout",
> +									 &ec->watchdog_timer_timeout, 0);

Our tabs are length 8. ;-)

> +
>  	ec->use_pixman = param->use_pixman;
>  
>  	if (weston_compositor_init(&ec->base, display, argc, argv,
>  				   config) < 0) {
>  		weston_log("%s failed\n", __func__);
>  		goto err_base;
>  	}
>  
>  	/* Check if we run drm-backend using weston-launch */
>  	ec->base.launcher = weston_launcher_connect(&ec->base, param->tty,

Thanks,
pq


More information about the wayland-devel mailing list