[Intel-gfx] [PATCH 5/5] drm/i915: Wait for object idle without locks in atomic_commit.

Mon Nov 2 05:13:48 PST 2015

Op 29-10-15 om 01:30 schreef Matt Roper:
> On Wed, Sep 23, 2015 at 01:27:12PM +0200, Maarten Lankhorst wrote:
>> Make pinning and waiting a separate step, and wait for object idle
>> without struct_mutex held.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h           |  2 -
>>  drivers/gpu/drm/i915/i915_gem.c           |  6 ---
>>  drivers/gpu/drm/i915/intel_atomic_plane.c |  2 +
>>  drivers/gpu/drm/i915/intel_display.c      | 86 ++++++++++++++++++++++++++-----
>>  drivers/gpu/drm/i915/intel_drv.h          |  7 +--
>>  drivers/gpu/drm/i915/intel_fbdev.c        |  2 +-
>>  drivers/gpu/drm/i915/intel_overlay.c      |  6 ++-
>>  7 files changed, 84 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 3bf8a9b771d0..ec72fd457499 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2982,8 +2982,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
>>  int __must_check
>>  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>>  				     u32 alignment,
>> -				     struct intel_engine_cs *pipelined,
>> -				     struct drm_i915_gem_request **pipelined_request,
>>  				     const struct i915_ggtt_view *view);
>>  void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
>>  					      const struct i915_ggtt_view *view);
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 46f0e83ee6ee..ab02182c47a5 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -3782,17 +3782,11 @@ unlock:
>>  int
>>  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>>  				     u32 alignment,
>> -				     struct intel_engine_cs *pipelined,
>> -				     struct drm_i915_gem_request **pipelined_request,
>>  				     const struct i915_ggtt_view *view)
>>  {
>>  	u32 old_read_domains, old_write_domain;
>>  	int ret;
>>  
>> -	ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
>> -	if (ret)
>> -		return ret;
>> -
>>  	/* Mark the pin_display early so that we account for the
>>  	 * display coherency whilst setting up the cache domains.
>>  	 */
>> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
>> index a11980696595..c6bb0fc1edfb 100644
>> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
>> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
>> @@ -84,6 +84,7 @@ intel_plane_duplicate_state(struct drm_plane *plane)
>>  	state = &intel_state->base;
>>  
>>  	__drm_atomic_helper_plane_duplicate_state(plane, state);
>> +	intel_state->wait_req = NULL;
>>  
>>  	return state;
>>  }
>> @@ -100,6 +101,7 @@ void
>>  intel_plane_destroy_state(struct drm_plane *plane,
>>  			  struct drm_plane_state *state)
>>  {
>> +	WARN_ON(state && to_intel_plane_state(state)->wait_req);
>>  	drm_atomic_helper_plane_destroy_state(plane, state);
>>  }
>>  
>> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
>> index 2f046134cc9a..d817c44ee428 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -2291,9 +2291,7 @@ static unsigned int intel_linear_alignment(struct drm_i915_private *dev_priv)
>>  int
>>  intel_pin_and_fence_fb_obj(struct drm_plane *plane,
>>  			   struct drm_framebuffer *fb,
>> -			   const struct drm_plane_state *plane_state,
>> -			   struct intel_engine_cs *pipelined,
>> -			   struct drm_i915_gem_request **pipelined_request)
>> +			   const struct drm_plane_state *plane_state)
>>  {
>>  	struct drm_device *dev = fb->dev;
>>  	struct drm_i915_private *dev_priv = dev->dev_private;
>> @@ -2349,8 +2347,8 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane,
>>  	 */
>>  	intel_runtime_pm_get(dev_priv);
>>  
>> -	ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined,
>> -						   pipelined_request, &view);
>> +	ret = i915_gem_object_pin_to_display_plane(obj, alignment,
>> +						   &view);
> The pin to display plane (and hence the wait) happens inside
> intel_runtime_pm_get/put() in the current code.  When you pull the wait
> out to the various callsites, it isn't holding runtime pm anymore (at
> least not in a way that's obvious to me).  Can this be a problem?
> Neither runtime PM nor GEM internals are something I'm terribly familiar
> with, so you might want to get an ack from someone like Paulo or Chris?
I don't think the GPU
>
>>  	if (ret)
>>  		goto err_pm;
>>  
>> @@ -11357,9 +11355,14 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>>  	 * synchronisation, so all we want here is to pin the framebuffer
>>  	 * into the display plane and skip any waits.
>>  	 */
>> +	if (!mmio_flip) {
>> +		ret = i915_gem_object_sync(obj, ring, &request);
>> +		if (ret)
>> +			goto cleanup_pending;
>> +	}
>> +
>>  	ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
>> -					 crtc->primary->state,
>> -					 mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring, &request);
>> +					 crtc->primary->state);
> Probably a dumb question from someone who isn't familiar with GEM
> handling...the comment above the lines you added here says we just want
> to pin and skip any waits when doing MMIO flips (which matches the logic
> of your code change).  However it looks to me like the original code was
> still doing a wait, just with a potentially different ring parameter.
> What am I missing here?
A layer of obfuscation.

When doing mmio flips it was using i915_gem_request_get_ring because passing NULL would result in a CPU wait.
If it did wait with mmio flips it was a bug, because intel_mmio_flip_work_func does the waiting for mmio flips.

>>  	if (ret)
>>  		goto cleanup_pending;
>>  
>> @@ -12993,7 +12996,10 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
>>  				       struct drm_atomic_state *state,
>>  				       bool async)
>>  {
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>> +	struct drm_plane_state *plane_state;
>>  	struct drm_crtc_state *crtc_state;
>> +	struct drm_plane *plane;
>>  	struct drm_crtc *crtc;
>>  	int i, ret;
>>  
>> @@ -13006,6 +13012,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
>>  		ret = intel_crtc_wait_for_pending_flips(crtc);
>>  		if (ret)
>>  			return ret;
>> +
>> +		if (atomic_read(&to_intel_crtc(crtc)->unpin_work_count) >= 2)
>> +			flush_workqueue(dev_priv->wq);
>>  	}
>>  
>>  	ret = i915_mutex_lock_interruptible(dev);
>> @@ -13013,6 +13022,37 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
>>  		return ret;
>>  
>>  	ret = drm_atomic_helper_prepare_planes(dev, state);
>> +	if (!ret && !async) {
>> +		u32 reset_counter;
>> +
>> +		reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>> +		mutex_unlock(&dev->struct_mutex);
>> +
>> +		for_each_plane_in_state(state, plane, plane_state, i) {
>> +			struct intel_plane_state *intel_plane_state =
>> +				to_intel_plane_state(plane_state);
>> +
>> +			if (!intel_plane_state->wait_req)
>> +				continue;
>> +
>> +			ret = __i915_wait_request(intel_plane_state->wait_req,
>> +						  reset_counter, true,
>> +						  NULL, NULL);
>> +
>> +			/* Swallow -EIO errors to allow updates during hw lockup. */
>> +			if (ret == -EIO)
>> +				ret = 0;
>> +
>> +			if (ret)
>> +				break;
>> +		}
>> +
>> +		if (!ret)
>> +			return 0;
>> +
>> +		mutex_lock(&dev->struct_mutex);
>> +		drm_atomic_helper_cleanup_planes(dev, state);
>> +	}
>>  
>>  	mutex_unlock(&dev->struct_mutex);
>>  	return ret;
>> @@ -13039,15 +13079,17 @@ static int intel_atomic_commit(struct drm_device *dev,
>>  			       bool async)
>>  {
>>  	struct drm_i915_private *dev_priv = dev->dev_private;
>> -	struct drm_crtc *crtc;
>>  	struct drm_crtc_state *crtc_state;
>> +	struct drm_crtc *crtc;
>>  	int ret = 0;
>>  	int i;
>>  	bool any_ms = false;
>>  
>>  	ret = intel_atomic_prepare_commit(dev, state, async);
>> -	if (ret)
>> +	if (ret) {
>> +		DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
>>  		return ret;
>> +	}
>>  
>>  	drm_atomic_helper_swap_state(dev, state);
>>  
>> @@ -13318,7 +13360,8 @@ intel_prepare_plane_fb(struct drm_plane *plane,
>>  		 * This should only fail upon a hung GPU, in which case we
>>  		 * can safely continue.
>>  		 */
>> -		if (needs_modeset(crtc_state))
>> +		if (needs_modeset(crtc_state) &&
>> +		    to_intel_plane_state(plane->state)->visible)
>>  			ret = i915_gem_object_wait_rendering(old_obj, true);
>>  
>>  		/* Swallow -EIO errors to allow updates during hw lockup. */
>> @@ -13335,11 +13378,21 @@ intel_prepare_plane_fb(struct drm_plane *plane,
>>  		if (ret)
>>  			DRM_DEBUG_KMS("failed to attach phys object\n");
>>  	} else {
>> -		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL, NULL);
>> +		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state);
>>  	}
>>  
>> -	if (ret == 0)
>> +	if (ret == 0) {
>> +		if (obj && obj->last_write_req &&
>> +		    !i915_gem_request_completed(obj->last_write_req, true)) {
>> +			struct intel_plane_state *plane_state =
>> +				to_intel_plane_state(new_state);
>> +
>> +			i915_gem_request_assign(&plane_state->wait_req,
>> +						obj->last_write_req);
>> +		}
>> +
>>  		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
>> +	}
>>  
>>  	return ret;
>>  }
>> @@ -13357,9 +13410,12 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
>>  {
>>  	struct drm_device *dev = plane->dev;
>>  	struct intel_plane *intel_plane = to_intel_plane(plane);
>> +	struct intel_plane_state *old_intel_state;
>>  	struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
>>  	struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
>>  
>> +	old_intel_state = to_intel_plane_state(old_state);
>> +
>>  	if (!obj && !old_obj)
>>  		return;
>>  
>> @@ -13371,6 +13427,9 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
>>  	if ((old_obj && (old_obj->frontbuffer_bits & intel_plane->frontbuffer_bit)) ||
>>  	    (obj && !(obj->frontbuffer_bits & intel_plane->frontbuffer_bit)))
>>  		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
>> +
>> +	i915_gem_request_assign(&old_intel_state->wait_req, NULL);
>> +
>>  }
>>  
>>  int
>> @@ -15348,8 +15407,7 @@ void intel_modeset_gem_init(struct drm_device *dev)
>>  		mutex_lock(&dev->struct_mutex);
>>  		ret = intel_pin_and_fence_fb_obj(c->primary,
>>  						 c->primary->fb,
>> -						 c->primary->state,
>> -						 NULL, NULL);
>> +						 c->primary->state);
>>  		mutex_unlock(&dev->struct_mutex);
>>  		if (ret) {
>>  			DRM_ERROR("failed to pin boot fb on pipe %d\n",
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>> index cfdb0f2714cd..852a0d192f82 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -270,6 +270,9 @@ struct intel_plane_state {
>>  	int scaler_id;
>>  
>>  	struct drm_intel_sprite_colorkey ckey;
>> +
>> +	/* async flip related structures */
>> +	struct drm_i915_gem_request *wait_req;
>>  };
>>  
>>  struct intel_initial_plane_config {
>> @@ -1055,9 +1058,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
>>  				    struct drm_modeset_acquire_ctx *ctx);
>>  int intel_pin_and_fence_fb_obj(struct drm_plane *plane,
>>  			       struct drm_framebuffer *fb,
>> -			       const struct drm_plane_state *plane_state,
>> -			       struct intel_engine_cs *pipelined,
>> -			       struct drm_i915_gem_request **pipelined_request);
>> +			       const struct drm_plane_state *plane_state);
>>  struct drm_framebuffer *
>>  __intel_framebuffer_create(struct drm_device *dev,
>>  			   struct drm_mode_fb_cmd2 *mode_cmd,
>> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
>> index 65329127f0b9..51afee61ba7e 100644
>> --- a/drivers/gpu/drm/i915/intel_fbdev.c
>> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
>> @@ -155,7 +155,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>>  	}
>>  
>>  	/* Flush everything out, we'll be doing GTT only from now on */
>> -	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
>> +	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL);
>>  	if (ret) {
>>  		DRM_ERROR("failed to pin obj: %d\n", ret);
>>  		goto out_fb;
>> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
>> index 444542696a2c..1b18cc6bdbd6 100644
>> --- a/drivers/gpu/drm/i915/intel_overlay.c
>> +++ b/drivers/gpu/drm/i915/intel_overlay.c
>> @@ -749,7 +749,11 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>>  	if (ret != 0)
>>  		return ret;
>>  
>> -	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL,
>> +	ret = i915_gem_object_wait_rendering(new_bo, true);
> Again, I'm not super familiar with GEM internals...can this be a
> behavior change from the previous code?  Originally the pin_to_display
> plane function would have passed (obj->base.pending_write_domain == 0)
> as the second parameter here (readonly), but you're unconditionally
> passing true.  Can there not be pending writes against this object?
I don't think it would be important in the case of overlays. But maybe I should
just replace it with a call to i915_gem_object_sync and wait for full object idle.
> Overall I feel like this patch goes a bit too far outside my area of
> expertise for me to serve as the final reviewer for it.  Maybe it would
> be good to have someone like Chris or Ville take a look since they have
> a lot more experience in this area.
>
Slightly outside my area too. :)

~Maarten