[Intel-gfx] [PATCH 4/4] drm/i915: Get rid of acthd based guilty batch search

Ben Widawsky ben at bwidawsk.net
Wed Jan 29 22:44:34 CET 2014


On Wed, Jan 29, 2014 at 05:05:39PM +0200, Mika Kuoppala wrote:
> As we seek the guilty batch using request and hangcheck
> score, this code is not needed anymore.
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c |   91 ++-------------------------------------
>  1 file changed, 4 insertions(+), 87 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a46a1a7..8637898 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2241,70 +2241,6 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
>  	spin_unlock(&file_priv->mm.lock);
>  }
>  
> -static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj,
> -				    struct i915_address_space *vm)
> -{
> -	if (acthd >= i915_gem_obj_offset(obj, vm) &&
> -	    acthd < i915_gem_obj_offset(obj, vm) + obj->base.size)
> -		return true;
> -
> -	return false;
> -}
> -
> -static bool i915_head_inside_request(const u32 acthd_unmasked,
> -				     const u32 request_start,
> -				     const u32 request_end)
> -{
> -	const u32 acthd = acthd_unmasked & HEAD_ADDR;
> -
> -	if (request_start < request_end) {
> -		if (acthd >= request_start && acthd < request_end)
> -			return true;
> -	} else if (request_start > request_end) {
> -		if (acthd >= request_start || acthd < request_end)
> -			return true;
> -	}
> -
> -	return false;
> -}
> -
> -static struct i915_address_space *
> -request_to_vm(struct drm_i915_gem_request *request)
> -{
> -	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
> -	struct i915_address_space *vm;
> -
> -	if (request->ctx)
> -		vm = request->ctx->vm;
> -	else
> -		vm = &dev_priv->gtt.base;
> -
> -	return vm;
> -}
> -
> -static bool i915_request_guilty(struct drm_i915_gem_request *request,
> -				const u32 acthd, bool *inside)
> -{
> -	/* There is a possibility that unmasked head address
> -	 * pointing inside the ring, matches the batch_obj address range.
> -	 * However this is extremely unlikely.
> -	 */
> -	if (request->batch_obj) {
> -		if (i915_head_inside_object(acthd, request->batch_obj,
> -					    request_to_vm(request))) {
> -			*inside = true;
> -			return true;
> -		}
> -	}
> -
> -	if (i915_head_inside_request(acthd, request->head, request->tail)) {
> -		*inside = false;
> -		return true;
> -	}
> -
> -	return false;
> -}
> -
>  static bool i915_context_is_banned(const struct i915_hw_context *ctx)
>  {
>  	struct drm_i915_private *dev_priv;
> @@ -2329,30 +2265,11 @@ static bool i915_context_is_banned(const struct i915_hw_context *ctx)
>  	return false;
>  }
>  
> -static void i915_set_reset_status(struct intel_ring_buffer *ring,
> -				  struct drm_i915_gem_request *request,
> -				  const bool guilty)
> +static void i915_set_reset_status(struct i915_hw_context *ctx,
> +				 const bool guilty)
>  {
> -	const u32 acthd = intel_ring_get_active_head(ring);
> -	bool inside;
> -	unsigned long offset = 0;
> -	struct i915_hw_context *ctx = request->ctx;
>  	struct i915_ctx_hang_stats *hs;
>  
> -	if (request->batch_obj)
> -		offset = i915_gem_obj_offset(request->batch_obj,
> -					     request_to_vm(request));
> -
> -	if (guilty &&
> -	    i915_request_guilty(request, acthd, &inside)) {
> -		DRM_DEBUG("%s hung %s bo (0x%lx ctx %d) at 0x%x\n",
> -			  ring->name,
> -			  inside ? "inside" : "flushing",
> -			  offset,
> -			  ctx ? ctx->id : 0,
> -			  acthd);
> -	}
> -
>  	if (WARN_ON(!ctx))
>  		return;
>  
> @@ -2407,10 +2324,10 @@ static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
>  
>  	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
>  
> -	i915_set_reset_status(ring, request, ring_hung);
> +	i915_set_reset_status(request->ctx, ring_hung);
>  
>  	list_for_each_entry_continue(request, &ring->request_list, list)
> -		i915_set_reset_status(ring, request, false);
> +		i915_set_reset_status(request->ctx, false);
>  }
>  
>  static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,

I can't say that I'm a huge fan of calling i915_set_reset_status twice
(I bit my lip while reading the last patch). To me it suggests that the
interface probably ended up a bit poorly designed. I can live with it
though, I just couldn't bite my lip for 2 patches in a row :-)

I guess I've missed how this solves the issue I poked about in the
original series. However, the code overall is a big improvement, and
even in the unlikely case that I am not just being blind to your
solution- the odds of having multiple hung rings are slim enough that I
can live with that either way.

I did put some requests in the patches. Each already had an
unconditional r-b. Therefore, the series is:
Reviewed-by: Ben Widawsky <ben at bwidawsk.net>

Last word: As I've discussed with Chris too, I am overall a bit wary of
removing any use upon hardware for doing a lot of these error triage,
detection and collection functions. I really like that no matter how
bonghits our driver gets, we can read certain registers to try to figure
things out. I say this now since I think after this series I will no
longer have a leg to stand on in the, we shouldn't use requests for
error collection, discussion. Thanks for reading my rant.

-- 
Ben Widawsky, Intel Open Source Technology Center



More information about the Intel-gfx mailing list