[Intel-gfx] [RFC] drm/i915/lrc: allocate separate page for HWSP

Michel Thierry michel.thierry at intel.com
Thu Jul 6 18:51:58 UTC 2017


On 7/6/2017 10:59 AM, Chris Wilson wrote:
 > If there are no conflicts, then just skip the patch, and really there
 > shouldn't be since the writes we care about are ordered and the writes
 > we don't, we don't. We will need to move to per-context hwsp in the near
 > future which should alleviate these concerns.

It helped me explain the strange data I was seeing in the HSWP during 
driver load came from (that random data wasn't really random, I was 
looking at the PPHWSP which later became the HWSP).

Just a comment/fix for GuC submission below, because as it is, this will 
break it.

-Michel

On 06/07/17 09:10, Daniele Ceraolo Spurio wrote:
> On gen8+ we're currently using the PPHWSP of the kernel ctx as the
> global HWSP. However, when the kernel ctx gets submitted (e.g. from
> __intel_autoenable_gt_powersave) the HW will use that page as both
> HWSP and PPHWSP. Currently we're not seeing any problem because the
> conflict happens at offsets below 0x30 in an area we don't access,
> but that is not guaranteed to be true for future platform.
> 
> To avoid the conflict, instead of re-using the PPHWSP of the kernel
> ctx we can allocate a separate page for the HWSP like what happens for
> pre-execlists platform.
> 
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com> > ---
>   drivers/gpu/drm/i915/intel_engine_cs.c  | 123 +++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_lrc.c        |  42 +----------
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 125 +-------------------------------
>   3 files changed, 127 insertions(+), 163 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index a55cd72..70e9d88 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -444,6 +444,114 @@ static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
>   	i915_vma_unpin_and_release(&engine->scratch);
>   }
>   
> +static void cleanup_phys_status_page(struct intel_engine_cs *engine)
> +{
> +	struct drm_i915_private *dev_priv = engine->i915;
> +
> +	if (!dev_priv->status_page_dmah)
> +		return;
> +
> +	drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
> +	engine->status_page.page_addr = NULL;
> +}
> +
> +static void cleanup_status_page(struct intel_engine_cs *engine)
> +{
> +	struct i915_vma *vma;
> +	struct drm_i915_gem_object *obj;
> +
> +	vma = fetch_and_zero(&engine->status_page.vma);
> +	if (!vma)
> +		return;
> +
> +	obj = vma->obj;
> +
> +	i915_vma_unpin(vma);
> +	i915_vma_close(vma);
> +
> +	i915_gem_object_unpin_map(obj);
> +	__i915_gem_object_release_unless_active(obj);
> +}
> +
> +static int init_status_page(struct intel_engine_cs *engine)
> +{
> +	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
> +	unsigned int flags;
> +	void *vaddr;
> +	int ret;
> +
> +	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
> +	if (IS_ERR(obj)) {
> +		DRM_ERROR("Failed to allocate status page\n");
> +		return PTR_ERR(obj);
> +	}
> +
> +	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> +	if (ret)
> +		goto err;
> +
> +	vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);
> +		goto err;
> +	}
> +
> +	flags = PIN_GLOBAL;
> +	if (!HAS_LLC(engine->i915))
> +		/* On g33, we cannot place HWS above 256MiB, so
> +		 * restrict its pinning to the low mappable arena.
> +		 * Though this restriction is not documented for
> +		 * gen4, gen5, or byt, they also behave similarly
> +		 * and hang if the HWS is placed at the top of the
> +		 * GTT. To generalise, it appears that all !llc
> +		 * platforms have issues with us placing the HWS
> +		 * above the mappable region (even though we never
> +		 * actualy map it).

Chance to fix a typo ("actualy"), and proof that it's really just 
reusing code.

> +		 */
> +		flags |= PIN_MAPPABLE;
> +	ret = i915_vma_pin(vma, 0, 4096, flags);
> +	if (ret)
> +		goto err;

This will break GuC submission, the HWSP will be allocated correctly,

[ ] [drm:intel_engine_init_common [i915]] rcs0 hws offset: 0x00001000
[ ] [drm:intel_engine_init_common [i915]] bcs0 hws offset: 0x00002000
[ ] [drm:intel_engine_init_common [i915]] vcs0 hws offset: 0x00003000
[ ] [drm:intel_engine_init_common [i915]] vcs1 hws offset: 0x00004000
[ ] [drm:intel_engine_init_common [i915]] vecs0 hws offset: 0x00005000

But these are below GUC_WOPCM_TOP (0x80000), and our _beloved_ fw must 
be having problems accessing them, because this causes:

[ ] [drm:intel_guc_init_hw [i915]] GuC fw status: fetch SUCCESS, load 
PENDING
[ ] [drm:guc_ucode_xfer_dma [i915]] DMA status 0x10, GuC status 0x800071ec
[ ] [drm:guc_ucode_xfer_dma [i915]] returning -110

So we must need this restriction before pinning it,

@@ -510,6 +510,11 @@ static int init_status_page(struct intel_engine_cs 
*engine)
                  * actualy map it).
                  */
                 flags |= PIN_MAPPABLE;
+
+       /* GuC can only access pages above GUC_WOPCM_TOP ¯\_(ツ)_/¯ */
+       if (HAS_GUC(engine->i915) && i915.enable_guc_loading)
+               flags |= PIN_OFFSET_BIAS | GUC_WOPCM_TOP;
+
         ret = i915_vma_pin(vma, 0, 4096, flags);
         if (ret)
                 goto err;

With that change, GuC is happy:

[ ] [drm:intel_engine_init_common [i915]] rcs0 hws offset: 0x00084000
[ ] [drm:intel_engine_init_common [i915]] bcs0 hws offset: 0x00089000
[ ] [drm:intel_engine_init_common [i915]] vcs0 hws offset: 0x0008e000
[ ] [drm:intel_engine_init_common [i915]] vcs1 hws offset: 0x00093000
[ ] [drm:intel_engine_init_common [i915]] vecs0 hws offset: 0x00098000
...
[ ] [drm:intel_guc_init_hw [i915]] GuC fw status: fetch SUCCESS, load 
PENDING
[ ] [drm:guc_ucode_xfer_dma [i915]] DMA status 0x10, GuC status 0x8002f0ec
[ ] [drm:guc_ucode_xfer_dma [i915]] returning 0
[ ] [drm] GuC submission enabled (firmware i915/skl_guc_ver9_33.bin 
[version 9.33])


> +
> +	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +	if (IS_ERR(vaddr)) {
> +		ret = PTR_ERR(vaddr);
> +		goto err_unpin;
> +	}
> +
> +	engine->status_page.vma = vma;
> +	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
> +	engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
> +
> +	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
> +			 engine->name, i915_ggtt_offset(vma));
> +	return 0;
> +
> +err_unpin:
> +	i915_vma_unpin(vma);
> +err:
> +	i915_gem_object_put(obj);
> +	return ret;
> +}
> +
> +static int init_phys_status_page(struct intel_engine_cs *engine)
> +{
> +	struct drm_i915_private *dev_priv = engine->i915;
> +
> +	GEM_BUG_ON(engine->id != RCS);
> +
> +	dev_priv->status_page_dmah =
> +		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
> +	if (!dev_priv->status_page_dmah)
> +		return -ENOMEM;
> +
> +	engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
> +	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
> +
> +	return 0;
> +}
> +
>   /**
>    * intel_engines_init_common - initialize cengine state which might require hw access
>    * @engine: Engine to initialize.
> @@ -481,8 +589,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>   	if (ret)
>   		goto err_unpin;
>   
> +	if (HWS_NEEDS_PHYSICAL(engine->i915))
> +		ret = init_phys_status_page(engine);
> +	else
> +		ret = init_status_page(engine);
> +	if (ret)
> +		goto err_rs_fini;
> +
>   	return 0;
>   
> +err_rs_fini:
> +	i915_gem_render_state_fini(engine);
> +
>   err_unpin:
>   	engine->context_unpin(engine, engine->i915->kernel_context);
>   	return ret;
> @@ -499,6 +617,11 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>   {
>   	intel_engine_cleanup_scratch(engine);
>   
> +	if (HWS_NEEDS_PHYSICAL(engine->i915))
> +		cleanup_phys_status_page(engine);
> +	else
> +		cleanup_status_page(engine);
> +
>   	i915_gem_render_state_fini(engine);
>   	intel_engine_fini_breadcrumbs(engine);
>   	intel_engine_cleanup_cmd_parser(engine);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 699868d..d84a36c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1641,11 +1641,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
>   	if (engine->cleanup)
>   		engine->cleanup(engine);
>   
> -	if (engine->status_page.vma) {
> -		i915_gem_object_unpin_map(engine->status_page.vma->obj);
> -		engine->status_page.vma = NULL;
> -	}
> -
>   	intel_engine_cleanup_common(engine);
>   
>   	lrc_destroy_wa_ctx(engine);
> @@ -1692,24 +1687,6 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
>   	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
>   }
>   
> -static int
> -lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
> -{
> -	const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE;
> -	void *hws;
> -
> -	/* The HWSP is part of the default context object in LRC mode. */
> -	hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
> -	if (IS_ERR(hws))
> -		return PTR_ERR(hws);
> -
> -	engine->status_page.page_addr = hws + hws_offset;
> -	engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset;
> -	engine->status_page.vma = vma;
> -
> -	return 0;
> -}
> -
>   static void
>   logical_ring_setup(struct intel_engine_cs *engine)
>   {
> @@ -1742,27 +1719,14 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
>   	logical_ring_default_irqs(engine);
>   }
>   
> -static int
> -logical_ring_init(struct intel_engine_cs *engine)
> +static int logical_ring_init(struct intel_engine_cs *engine)
>   {
> -	struct i915_gem_context *dctx = engine->i915->kernel_context;
> -	int ret;
> +	int ret = 0;
>   
>   	ret = intel_engine_init_common(engine);
>   	if (ret)
> -		goto error;
> -
> -	/* And setup the hardware status page. */
> -	ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
> -	if (ret) {
> -		DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret);
> -		goto error;
> -	}
> -
> -	return 0;
> +		intel_logical_ring_cleanup(engine);
>   
> -error:
> -	intel_logical_ring_cleanup(engine);
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 5224b7a..b258053 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1174,113 +1174,7 @@ static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
>   	return 0;
>   }
>   
> -static void cleanup_phys_status_page(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	if (!dev_priv->status_page_dmah)
> -		return;
> -
> -	drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
> -	engine->status_page.page_addr = NULL;
> -}
> -
> -static void cleanup_status_page(struct intel_engine_cs *engine)
> -{
> -	struct i915_vma *vma;
> -	struct drm_i915_gem_object *obj;
> -
> -	vma = fetch_and_zero(&engine->status_page.vma);
> -	if (!vma)
> -		return;
> -
> -	obj = vma->obj;
> -
> -	i915_vma_unpin(vma);
> -	i915_vma_close(vma);
> -
> -	i915_gem_object_unpin_map(obj);
> -	__i915_gem_object_release_unless_active(obj);
> -}
>   
> -static int init_status_page(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_gem_object *obj;
> -	struct i915_vma *vma;
> -	unsigned int flags;
> -	void *vaddr;
> -	int ret;
> -
> -	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
> -	if (IS_ERR(obj)) {
> -		DRM_ERROR("Failed to allocate status page\n");
> -		return PTR_ERR(obj);
> -	}
> -
> -	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> -	if (ret)
> -		goto err;
> -
> -	vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
> -	if (IS_ERR(vma)) {
> -		ret = PTR_ERR(vma);
> -		goto err;
> -	}
> -
> -	flags = PIN_GLOBAL;
> -	if (!HAS_LLC(engine->i915))
> -		/* On g33, we cannot place HWS above 256MiB, so
> -		 * restrict its pinning to the low mappable arena.
> -		 * Though this restriction is not documented for
> -		 * gen4, gen5, or byt, they also behave similarly
> -		 * and hang if the HWS is placed at the top of the
> -		 * GTT. To generalise, it appears that all !llc
> -		 * platforms have issues with us placing the HWS
> -		 * above the mappable region (even though we never
> -		 * actualy map it).
> -		 */
> -		flags |= PIN_MAPPABLE;
> -	ret = i915_vma_pin(vma, 0, 4096, flags);
> -	if (ret)
> -		goto err;
> -
> -	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> -	if (IS_ERR(vaddr)) {
> -		ret = PTR_ERR(vaddr);
> -		goto err_unpin;
> -	}
> -
> -	engine->status_page.vma = vma;
> -	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
> -	engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
> -
> -	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
> -			 engine->name, i915_ggtt_offset(vma));
> -	return 0;
> -
> -err_unpin:
> -	i915_vma_unpin(vma);
> -err:
> -	i915_gem_object_put(obj);
> -	return ret;
> -}
> -
> -static int init_phys_status_page(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	GEM_BUG_ON(engine->id != RCS);
> -
> -	dev_priv->status_page_dmah =
> -		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
> -	if (!dev_priv->status_page_dmah)
> -		return -ENOMEM;
> -
> -	engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
> -	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
> -
> -	return 0;
> -}
>   
>   int intel_ring_pin(struct intel_ring *ring,
>   		   struct drm_i915_private *i915,
> @@ -1567,17 +1461,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
>   	if (err)
>   		goto err;
>   
> -	if (HWS_NEEDS_PHYSICAL(engine->i915))
> -		err = init_phys_status_page(engine);
> -	else
> -		err = init_status_page(engine);
> -	if (err)
> -		goto err;
> -
>   	ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
>   	if (IS_ERR(ring)) {
>   		err = PTR_ERR(ring);
> -		goto err_hws;
> +		goto err;
>   	}
>   
>   	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
> @@ -1592,11 +1479,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
>   
>   err_ring:
>   	intel_ring_free(ring);
> -err_hws:
> -	if (HWS_NEEDS_PHYSICAL(engine->i915))
> -		cleanup_phys_status_page(engine);
> -	else
> -		cleanup_status_page(engine);
>   err:
>   	intel_engine_cleanup_common(engine);
>   	return err;
> @@ -1615,11 +1497,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
>   	if (engine->cleanup)
>   		engine->cleanup(engine);
>   
> -	if (HWS_NEEDS_PHYSICAL(dev_priv))
> -		cleanup_phys_status_page(engine);
> -	else
> -		cleanup_status_page(engine);
> -
>   	intel_engine_cleanup_common(engine);
>   
>   	dev_priv->engine[engine->id] = NULL;
> 


More information about the Intel-gfx mailing list