[PATCH weston 3/3] gl-renderer: Emit GPU rendering begin and end timeline timepoints

Fri Sep 15 11:39:56 UTC 2017

On Thu, 14 Sep 2017 14:07:35 +0300
Alexandros Frantzis <alexandros.frantzis at collabora.com> wrote:

> Use EGL fence sync objects to emit timepoints for the beginning and the
> end of rendering on the GPU. The timepoints are emitted asynchronously
> using the sync file fds associated with the fence sync objects. The sync
> file fds are acquired using the facilities provided by the
> EGL_ANDROID_native_fence_sync extension.
> 
> If timelining is inactive or the required EGL extensions are not
> present, then GPU timepoint processing and emission are skipped.
> ---
>  libweston/gl-renderer.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 106 insertions(+)

Hi,

I am not familiar yet with the fence/sync APIs so I cannot review their
usage at this time. I do review all the Weston parts.

> 
> diff --git a/libweston/gl-renderer.c b/libweston/gl-renderer.c
> index c2e88a65..e4726d96 100644
> --- a/libweston/gl-renderer.c
> +++ b/libweston/gl-renderer.c
> @@ -38,8 +38,13 @@
>  #include <float.h>
>  #include <assert.h>
>  #include <linux/input.h>
> +#include <linux/sync_file.h>

How old is linux/sync_file.h?

Would we need configure.ac check for its existence?

I think we would need checks for it and make it optional, because my
Gentoo system has no such header installed. Linux-headers package seems
to be version 4.4 here.

Or should we have a fallback definition?

> +#include <sys/ioctl.h>
> +#include <unistd.h>
>  #include <drm_fourcc.h>
>  
> +#include "timeline.h"
> +
>  #include "gl-renderer.h"
>  #include "vertex-clipping.h"
>  #include "linux-dmabuf.h"
> @@ -47,6 +52,7 @@
>  
>  #include "shared/helpers.h"
>  #include "shared/platform.h"
> +#include "shared/timespec-util.h"
>  #include "weston-egl-ext.h"
>  
>  struct gl_shader {
> @@ -238,6 +244,12 @@ struct gl_renderer {
>  	PFNEGLDUPNATIVEFENCEFDANDROIDPROC dup_native_fence_fd;
>  };
>  
> +struct timeline_render {
> +	int end;

Rather than (effectively) 'bool is_end', this could be

	enum {
		RENDER_GPU_BEGIN,
		RENDER_GPU_END
	} type;

That would make the code a little more self-documenting.

> +	struct weston_output *output;
> +	struct wl_event_source *event_source;
> +};
> +
>  static PFNEGLGETPLATFORMDISPLAYEXTPROC get_platform_display = NULL;
>  
>  static inline const char *
> @@ -274,6 +286,90 @@ get_renderer(struct weston_compositor *ec)
>  	return (struct gl_renderer *)ec->renderer;
>  }
>  
> +static uint64_t
> +read_fence_sync_fd_timestamp(int fd)
> +{
> +	struct sync_file_info file_info = { 0 };
> +	struct sync_fence_info fence_info = { 0 };
> +
> +	file_info.sync_fence_info = (uint64_t)&fence_info;

Shouldn't casts between pointers and uints go through uintptr_t?

> +	file_info.num_fences = 1;
> +
> +	if (ioctl(fd, SYNC_IOC_FILE_INFO, &file_info) < 0)
> +		return 0;
> +
> +	return fence_info.timestamp_ns;
> +}
> +
> +static EGLSyncKHR
> +timeline_fence_sync(struct gl_renderer *gr)
> +{
> +	static const EGLint attribs[] = { EGL_NONE };
> +
> +	if (!weston_timeline_enabled_ || !gr->has_native_fence_sync)
> +		return EGL_NO_SYNC_KHR;
> +
> +	return gr->create_sync(gr->egl_display, EGL_SYNC_NATIVE_FENCE_ANDROID,
> +			       attribs);
> +}
> +
> +static int
> +timeline_render_handler(int fd, uint32_t mask, void *data)
> +{
> +	struct timeline_render *tr = (struct timeline_render*)data;

Unnecessary cast.

> +	const char *tp_name = tr->end ? "renderer_gpu_end" : "renderer_gpu_begin";
> +
> +	if (mask & WL_EVENT_READABLE) {
> +		uint64_t ts = read_fence_sync_fd_timestamp(fd);
> +		struct timespec tspec = { 0 };
> +
> +		timespec_add_nsec(&tspec, &tspec, ts);
> +
> +		TL_POINT(tp_name, TLP_GPU(&tspec), TLP_OUTPUT(tr->output), TLP_END);

What do we know about the clock domain / time base for the GPU
timestamps?

I did not see anywhere a check that the graphics clock domain we have
chosen (weston_compositor::presentation_clock) is the same as the GPU
timestamp domain.

I believe there should be a check, and if the domains do not match,
print a message to the log saying they don't match and disable the GPU
timestamping.

> +	}
> +
> +	wl_event_source_remove(tr->event_source);
> +	close(fd);
> +
> +	free(data);
> +
> +	return 0;
> +}
> +
> +static void
> +timeline_submit_render(struct gl_renderer *gr, struct weston_compositor *ec,
> +		       struct weston_output *output,
> +		       EGLSyncKHR begin_sync, EGLSyncKHR end_sync)
> +{
> +	struct wl_event_loop *loop;
> +	int begin_fd, end_fd;
> +	struct timeline_render *begin_tr, *end_tr;
> +
> +	if (!weston_timeline_enabled_ || !gr->has_native_fence_sync)
> +		return;
> +
> +	loop = wl_display_get_event_loop(ec->wl_display);
> +	begin_fd = gr->dup_native_fence_fd(gr->egl_display, begin_sync);
> +	end_fd = gr->dup_native_fence_fd(gr->egl_display, end_sync);

Failure checks?

> +
> +	begin_tr = zalloc(sizeof *begin_tr);
> +	begin_tr->end = 0;
> +	begin_tr->output = output;
> +	begin_tr->event_source =
> +		wl_event_loop_add_fd(loop, begin_fd, WL_EVENT_READABLE,
> +				     timeline_render_handler, begin_tr);

Failure check?

> +
> +	end_tr = zalloc(sizeof *end_tr);
> +	end_tr->end = 1;
> +	end_tr->output = output;
> +	end_tr->event_source =
> +		wl_event_loop_add_fd(loop, end_fd, WL_EVENT_READABLE,
> +				     timeline_render_handler, end_tr);
> +
> +	gr->destroy_sync(gr->egl_display, begin_sync);
> +	gr->destroy_sync(gr->egl_display, end_sync);

I think there is a slight danger of leaking the timeline_render objects
on e.g. compositor exit when we stop the event loop, maybe?

I'd like to be defensive here and collect them in a list in e.g. struct
gl_output_state. That way we could clean them up and print warnings if
some remain on e.g. output destruction. It's not completely obvious
whether our repaint machinery guarantees that the output cannot
disappear while there are pending sync events either. Normally,
whenever one stores a pointer to a weston_output, one also needs a
destroy listener to avoid a stale pointer. But in this case hooking up
a sanity check in the output destruction instead should be fine.

> +}
> +
>  static struct egl_image*
>  egl_image_create(struct gl_renderer *gr, EGLenum target,
>  		 EGLClientBuffer buffer, const EGLint *attribs)
> @@ -1106,10 +1202,13 @@ gl_renderer_repaint_output(struct weston_output *output,
>  	pixman_box32_t *rects;
>  	pixman_region32_t buffer_damage, total_damage;
>  	enum gl_border_status border_damage = BORDER_STATUS_CLEAN;
> +	EGLSyncKHR begin_render_sync, end_render_sync;
>  
>  	if (use_output(output) < 0)
>  		return;
>  
> +	begin_render_sync = timeline_fence_sync(gr);
> +
>  	/* Calculate the viewport */
>  	glViewport(go->borders[GL_RENDERER_BORDER_LEFT].width,
>  		   go->borders[GL_RENDERER_BORDER_BOTTOM].height,
> @@ -1158,6 +1257,8 @@ gl_renderer_repaint_output(struct weston_output *output,
>  	pixman_region32_copy(&output->previous_damage, output_damage);
>  	wl_signal_emit(&output->frame_signal, output);
>  
> +	end_render_sync = timeline_fence_sync(gr);
> +
>  	if (gr->swap_buffers_with_damage) {
>  		pixman_region32_init(&buffer_damage);
>  		weston_transformed_region(output->width, output->height,
> @@ -1203,6 +1304,11 @@ gl_renderer_repaint_output(struct weston_output *output,
>  	}
>  
>  	go->border_status = BORDER_STATUS_CLEAN;
> +
> +	/* We have to submit the fence sync objects after swap buffers, since
> +	 * the objects get assigned a valid sync file fd only after a gl flush.
> +	 */
> +	timeline_submit_render(gr, compositor, output, begin_render_sync, end_render_sync);

Split long line.

>  }
>  
>  static int

The approach looks good to me. Just a bit more polishing on the code
needed.

Thanks,
pq
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.freedesktop.org/archives/wayland-devel/attachments/20170915/1d62fca9/attachment.sig>