[Intel-gfx] [PATCH v3] drm/i915: Use new i915_gem_object_pin_map for LRC

Tue Apr 12 13:19:54 UTC 2016

On 12/04/16 14:12, Chris Wilson wrote:
> On Tue, Apr 12, 2016 at 02:05:05PM +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> We can use the new pin/lazy unpin API for simplicity
>> and more performance in the execlist submission paths.
>>
>> v2:
>>    * Fix error handling and convert more users.
>>    * Compact some names for readability.
>>
>> v3:
>>    * intel_lr_context_free was not unpinning.
>>    * Special case for GPU reset which otherwise unbalances
>>      the HWS object pages pin count by running the engine
>>      initialization only (not destructors).
>
> Ah! Light dawns...
>
> Should we not just separate out the hws setup and hws hw_init?

Okay...

>> -static void lrc_setup_hardware_status_page(struct intel_engine_cs *engine,
>> -					   struct drm_i915_gem_object *default_ctx_obj)
>> +static int
>> +lrc_setup_hws(struct intel_engine_cs *engine,
>> +	      struct drm_i915_gem_object *def_ctx_obj)
>>   {
>>   	struct drm_i915_private *dev_priv = engine->dev->dev_private;
>> -	struct page *page;
>> +	void *hws;
>>
>>   	/* The HWSP is part of the default context object in LRC mode. */
>> -	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
>> -			+ LRC_PPHWSP_PN * PAGE_SIZE;
>> -	page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
>> -	engine->status_page.page_addr = kmap(page);
>> -	engine->status_page.obj = default_ctx_obj;
>> +	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(def_ctx_obj) +
>> +				       LRC_PPHWSP_PN * PAGE_SIZE;
>> +	hws = i915_gem_object_pin_map(def_ctx_obj);
>> +	if (IS_ERR(hws))
>> +		return PTR_ERR(hws);
>> +	engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
>> +	engine->status_page.obj = def_ctx_obj;

... so above here is setup and below is init, correct?

>>   	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
>> -			(u32)engine->status_page.gfx_addr);
>> +		   (u32)engine->status_page.gfx_addr);
>>   	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
>> +
>> +	return 0;
>>   }
>>   /**
>> @@ -2688,10 +2700,9 @@ error_deref_obj:
>>   	return ret;
>>   }
>>
>> -void intel_lr_context_reset(struct drm_device *dev,
>> -			struct intel_context *ctx)
>> +void intel_lr_context_reset(struct drm_i915_private *dev_priv,
>> +			    struct intel_context *ctx)
>>   {
>> -	struct drm_i915_private *dev_priv = dev->dev_private;
>>   	struct intel_engine_cs *engine;
>>
>>   	for_each_engine(engine, dev_priv) {
>> @@ -2699,23 +2710,31 @@ void intel_lr_context_reset(struct drm_device *dev,
>>   				ctx->engine[engine->id].state;
>>   		struct intel_ringbuffer *ringbuf =
>>   				ctx->engine[engine->id].ringbuf;
>> +		void *obj_addr;
>>   		uint32_t *reg_state;
>> -		struct page *page;
>>
>>   		if (!ctx_obj)
>>   			continue;
>>
>> -		if (i915_gem_object_get_pages(ctx_obj)) {
>> -			WARN(1, "Failed get_pages for context obj\n");
>> +		obj_addr = i915_gem_object_pin_map(ctx_obj);
>> +		if (WARN_ON(IS_ERR(obj_addr)))
>>   			continue;
>> -		}
>> -		page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
>> -		reg_state = kmap_atomic(page);
>> +
>> +		reg_state = obj_addr + LRC_STATE_PN * PAGE_SIZE;
>> +		ctx_obj->dirty = true;
>>
>>   		reg_state[CTX_RING_HEAD+1] = 0;
>>   		reg_state[CTX_RING_TAIL+1] = 0;
>>
>> -		kunmap_atomic(reg_state);
>> +		i915_gem_object_unpin_map(ctx_obj);
>> +
>> +		/*
>> +		 * Kernel context will pin_map the HWS after reset so we
>> +		 * have to drop the pin count here in order ->pages_pin_count
>> +		 * remains balanced.
>> +		 */
>> +		if (ctx == dev_priv->kernel_context)
>> +			i915_gem_object_unpin_map(engine->status_page.obj);
>
> Then we wouldn't have this kludge. That's going to be worth the time and
> we can then split out the bug fix for the current code.

Yes should not be adding more mines to the minefield. :)

Regards,

Tvrtko