[Intel-gfx] [PATCH 1/4] drm/i915: Suppress redundant syncs with mmio page flips

Fri Sep 19 22:30:56 CEST 2014

On Fri, Sep 19, 2014 at 08:58:58PM +0300, Ville Syrjälä wrote:
> On Wed, Jun 18, 2014 at 12:23:14PM +0100, Chris Wilson wrote:
> > Since mmio-flips do not occur on the suggested ring, we are introducing
> > an extra sync operation where none is required. Pass the current
> > obj->ring, which is what mmio flip will use, to pin_to_display_plane so
> > that we emit the appropriate synchronisation (none).
> > 
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> 
> I had a vague recollection that I had seen something like this and now I
> found it...
> 
> The patch could use a rebase I think, and perhaps it could use a boolean
> to avoid duplicating the buffer pinning in two place? I was thinking
> something like:
> 
> ring = whatever;
> bool mmio_flip = use_mmio_flip(ring);
> if (mmio_flip)
> 	ring = obj->ring;
> pin_and_fence(ring);
> ...
> if (mmio_flip)
> 	...
> else
> 	...

This is the final version:

        if (use_mmio_flip(engine, obj, page_flip_flags)) {
                rq = i915_request_get(obj->last_write.request);

                ret = intel_pin_and_fence_fb_obj(dev, obj, rq);
                if (ret)
                        goto cleanup_rq;

                work->gtt_offset =
                        i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;

                ret = intel_queue_mmio_flip(intel_crtc, rq);
                if (ret)
                        goto cleanup_unpin;
        } else {
                struct intel_context *ctx = engine->default_context;
                if (obj->last_write.request)
                        ctx = obj->last_write.request->ctx;
                rq = intel_engine_alloc_request(engine, ctx);
                if (IS_ERR(rq)) {
                        ret = PTR_ERR(rq);
                        goto cleanup_pending;
                }

                ret = intel_pin_and_fence_fb_obj(dev, obj, rq);
                if (ret)
                        goto cleanup_rq;

                work->gtt_offset =
                        i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;

                ret = dev_priv->display.queue_flip(rq, intel_crtc, fb, obj,
                                                   page_flip_flags);
                if (ret)
                        goto cleanup_unpin;

                intel_mark_page_flip_active(intel_crtc);

                ret = i915_request_commit(rq);
                if (ret)
                        goto cleanup_unpin;
        }

which your suggestion would make

	do_mmio = use_mmio_flip(engine, obj, page_flip_flags);
	if (do_mmio) {
                rq = i915_request_get(obj->last_write.request);
	} else {
                struct intel_context *ctx = engine->default_context;
                if (obj->last_write.request)
                        ctx = obj->last_write.request->ctx;
                rq = intel_engine_alloc_request(engine, ctx);
                if (IS_ERR(rq)) {
                        ret = PTR_ERR(rq);
                        goto cleanup_pending;
                }
	]

	ret = intel_pin_and_fence_fb_obj(dev, obj, rq);
	if (ret)
		goto cleanup_rq;

	work->gtt_offset =
		i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;

	if (do_mmio) {
                ret = intel_queue_mmio_flip(intel_crtc, rq);
        } else {
                ret = dev_priv->display.queue_flip(rq, intel_crtc, fb, obj,
                                                   page_flip_flags);
                if (ret == 0) {
                	intel_mark_page_flip_active(intel_crtc);
                	ret = i915_request_commit(rq);
		}
        }
	if (ret)
		goto cleanup_unpin;

Here, I definitely want to keep the allocated request tied to the
i915_request_commit(), so I favour the duplication in this case.

Unless I push the queue_flip tail down again, i.e.

	if (do_mmio)
		ret = intel_queue_mmio_flip(intel_crtc, rq);
	else
		ret = intel_queue_page_flip(rq, intel_crtc, fb, obj,
					    page_flip_flags);
	if (ret)
		goto cleanup_unpin;

or push do_mmio into page_flip_flags and have a single function here?

	page_flip_flags |= use_mmio_flip(engine, obj, page_flip_flags);

	rq = intel_page_flip_alloc_request(engine, obj, page_flip_flags);
	if (IS_ERR(rq)) {
		ret = PTR_ERR(rq);
		goto cleanup_pending;
	}

	ret = intel_pin_and_fence_fb_obj(dev, obj, rq);
	if (ret)
		goto cleanup_rq;

	work->gtt_offset =
		i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;

	ret = intel_page_flip_queue(rq, intel_crtc, fb, obj, page_flip_flags);
	if (ret)
		goto cleanup_unpin;

Maybe, just now worrying about hiding too much magic behind the scenes.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre