[Intel-gfx] [RFC] drm/i915/lrc: allocate separate page for HWSP
Michel Thierry
michel.thierry at intel.com
Thu Jul 6 18:51:58 UTC 2017
On 7/6/2017 10:59 AM, Chris Wilson wrote:
> If there are no conflicts, then just skip the patch, and really there
> shouldn't be since the writes we care about are ordered and the writes
> we don't, we don't. We will need to move to per-context hwsp in the near
> future which should alleviate these concerns.
It helped me explain the strange data I was seeing in the HSWP during
driver load came from (that random data wasn't really random, I was
looking at the PPHWSP which later became the HWSP).
Just a comment/fix for GuC submission below, because as it is, this will
break it.
-Michel
On 06/07/17 09:10, Daniele Ceraolo Spurio wrote:
> On gen8+ we're currently using the PPHWSP of the kernel ctx as the
> global HWSP. However, when the kernel ctx gets submitted (e.g. from
> __intel_autoenable_gt_powersave) the HW will use that page as both
> HWSP and PPHWSP. Currently we're not seeing any problem because the
> conflict happens at offsets below 0x30 in an area we don't access,
> but that is not guaranteed to be true for future platform.
>
> To avoid the conflict, instead of re-using the PPHWSP of the kernel
> ctx we can allocate a separate page for the HWSP like what happens for
> pre-execlists platform.
>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com> > ---
> drivers/gpu/drm/i915/intel_engine_cs.c | 123 +++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_lrc.c | 42 +----------
> drivers/gpu/drm/i915/intel_ringbuffer.c | 125 +-------------------------------
> 3 files changed, 127 insertions(+), 163 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index a55cd72..70e9d88 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -444,6 +444,114 @@ static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
> i915_vma_unpin_and_release(&engine->scratch);
> }
>
> +static void cleanup_phys_status_page(struct intel_engine_cs *engine)
> +{
> + struct drm_i915_private *dev_priv = engine->i915;
> +
> + if (!dev_priv->status_page_dmah)
> + return;
> +
> + drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
> + engine->status_page.page_addr = NULL;
> +}
> +
> +static void cleanup_status_page(struct intel_engine_cs *engine)
> +{
> + struct i915_vma *vma;
> + struct drm_i915_gem_object *obj;
> +
> + vma = fetch_and_zero(&engine->status_page.vma);
> + if (!vma)
> + return;
> +
> + obj = vma->obj;
> +
> + i915_vma_unpin(vma);
> + i915_vma_close(vma);
> +
> + i915_gem_object_unpin_map(obj);
> + __i915_gem_object_release_unless_active(obj);
> +}
> +
> +static int init_status_page(struct intel_engine_cs *engine)
> +{
> + struct drm_i915_gem_object *obj;
> + struct i915_vma *vma;
> + unsigned int flags;
> + void *vaddr;
> + int ret;
> +
> + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
> + if (IS_ERR(obj)) {
> + DRM_ERROR("Failed to allocate status page\n");
> + return PTR_ERR(obj);
> + }
> +
> + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> + if (ret)
> + goto err;
> +
> + vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
> + if (IS_ERR(vma)) {
> + ret = PTR_ERR(vma);
> + goto err;
> + }
> +
> + flags = PIN_GLOBAL;
> + if (!HAS_LLC(engine->i915))
> + /* On g33, we cannot place HWS above 256MiB, so
> + * restrict its pinning to the low mappable arena.
> + * Though this restriction is not documented for
> + * gen4, gen5, or byt, they also behave similarly
> + * and hang if the HWS is placed at the top of the
> + * GTT. To generalise, it appears that all !llc
> + * platforms have issues with us placing the HWS
> + * above the mappable region (even though we never
> + * actualy map it).
Chance to fix a typo ("actualy"), and proof that it's really just
reusing code.
> + */
> + flags |= PIN_MAPPABLE;
> + ret = i915_vma_pin(vma, 0, 4096, flags);
> + if (ret)
> + goto err;
This will break GuC submission, the HWSP will be allocated correctly,
[ ] [drm:intel_engine_init_common [i915]] rcs0 hws offset: 0x00001000
[ ] [drm:intel_engine_init_common [i915]] bcs0 hws offset: 0x00002000
[ ] [drm:intel_engine_init_common [i915]] vcs0 hws offset: 0x00003000
[ ] [drm:intel_engine_init_common [i915]] vcs1 hws offset: 0x00004000
[ ] [drm:intel_engine_init_common [i915]] vecs0 hws offset: 0x00005000
But these are below GUC_WOPCM_TOP (0x80000), and our _beloved_ fw must
be having problems accessing them, because this causes:
[ ] [drm:intel_guc_init_hw [i915]] GuC fw status: fetch SUCCESS, load
PENDING
[ ] [drm:guc_ucode_xfer_dma [i915]] DMA status 0x10, GuC status 0x800071ec
[ ] [drm:guc_ucode_xfer_dma [i915]] returning -110
So we must need this restriction before pinning it,
@@ -510,6 +510,11 @@ static int init_status_page(struct intel_engine_cs
*engine)
* actualy map it).
*/
flags |= PIN_MAPPABLE;
+
+ /* GuC can only access pages above GUC_WOPCM_TOP ¯\_(ツ)_/¯ */
+ if (HAS_GUC(engine->i915) && i915.enable_guc_loading)
+ flags |= PIN_OFFSET_BIAS | GUC_WOPCM_TOP;
+
ret = i915_vma_pin(vma, 0, 4096, flags);
if (ret)
goto err;
With that change, GuC is happy:
[ ] [drm:intel_engine_init_common [i915]] rcs0 hws offset: 0x00084000
[ ] [drm:intel_engine_init_common [i915]] bcs0 hws offset: 0x00089000
[ ] [drm:intel_engine_init_common [i915]] vcs0 hws offset: 0x0008e000
[ ] [drm:intel_engine_init_common [i915]] vcs1 hws offset: 0x00093000
[ ] [drm:intel_engine_init_common [i915]] vecs0 hws offset: 0x00098000
...
[ ] [drm:intel_guc_init_hw [i915]] GuC fw status: fetch SUCCESS, load
PENDING
[ ] [drm:guc_ucode_xfer_dma [i915]] DMA status 0x10, GuC status 0x8002f0ec
[ ] [drm:guc_ucode_xfer_dma [i915]] returning 0
[ ] [drm] GuC submission enabled (firmware i915/skl_guc_ver9_33.bin
[version 9.33])
> +
> + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> + if (IS_ERR(vaddr)) {
> + ret = PTR_ERR(vaddr);
> + goto err_unpin;
> + }
> +
> + engine->status_page.vma = vma;
> + engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
> + engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
> +
> + DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
> + engine->name, i915_ggtt_offset(vma));
> + return 0;
> +
> +err_unpin:
> + i915_vma_unpin(vma);
> +err:
> + i915_gem_object_put(obj);
> + return ret;
> +}
> +
> +static int init_phys_status_page(struct intel_engine_cs *engine)
> +{
> + struct drm_i915_private *dev_priv = engine->i915;
> +
> + GEM_BUG_ON(engine->id != RCS);
> +
> + dev_priv->status_page_dmah =
> + drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
> + if (!dev_priv->status_page_dmah)
> + return -ENOMEM;
> +
> + engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
> + memset(engine->status_page.page_addr, 0, PAGE_SIZE);
> +
> + return 0;
> +}
> +
> /**
> * intel_engines_init_common - initialize cengine state which might require hw access
> * @engine: Engine to initialize.
> @@ -481,8 +589,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
> if (ret)
> goto err_unpin;
>
> + if (HWS_NEEDS_PHYSICAL(engine->i915))
> + ret = init_phys_status_page(engine);
> + else
> + ret = init_status_page(engine);
> + if (ret)
> + goto err_rs_fini;
> +
> return 0;
>
> +err_rs_fini:
> + i915_gem_render_state_fini(engine);
> +
> err_unpin:
> engine->context_unpin(engine, engine->i915->kernel_context);
> return ret;
> @@ -499,6 +617,11 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
> {
> intel_engine_cleanup_scratch(engine);
>
> + if (HWS_NEEDS_PHYSICAL(engine->i915))
> + cleanup_phys_status_page(engine);
> + else
> + cleanup_status_page(engine);
> +
> i915_gem_render_state_fini(engine);
> intel_engine_fini_breadcrumbs(engine);
> intel_engine_cleanup_cmd_parser(engine);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 699868d..d84a36c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1641,11 +1641,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
> if (engine->cleanup)
> engine->cleanup(engine);
>
> - if (engine->status_page.vma) {
> - i915_gem_object_unpin_map(engine->status_page.vma->obj);
> - engine->status_page.vma = NULL;
> - }
> -
> intel_engine_cleanup_common(engine);
>
> lrc_destroy_wa_ctx(engine);
> @@ -1692,24 +1687,6 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
> engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
> }
>
> -static int
> -lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
> -{
> - const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE;
> - void *hws;
> -
> - /* The HWSP is part of the default context object in LRC mode. */
> - hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
> - if (IS_ERR(hws))
> - return PTR_ERR(hws);
> -
> - engine->status_page.page_addr = hws + hws_offset;
> - engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset;
> - engine->status_page.vma = vma;
> -
> - return 0;
> -}
> -
> static void
> logical_ring_setup(struct intel_engine_cs *engine)
> {
> @@ -1742,27 +1719,14 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
> logical_ring_default_irqs(engine);
> }
>
> -static int
> -logical_ring_init(struct intel_engine_cs *engine)
> +static int logical_ring_init(struct intel_engine_cs *engine)
> {
> - struct i915_gem_context *dctx = engine->i915->kernel_context;
> - int ret;
> + int ret = 0;
>
> ret = intel_engine_init_common(engine);
> if (ret)
> - goto error;
> -
> - /* And setup the hardware status page. */
> - ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
> - if (ret) {
> - DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret);
> - goto error;
> - }
> -
> - return 0;
> + intel_logical_ring_cleanup(engine);
>
> -error:
> - intel_logical_ring_cleanup(engine);
> return ret;
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 5224b7a..b258053 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1174,113 +1174,7 @@ static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
> return 0;
> }
>
> -static void cleanup_phys_status_page(struct intel_engine_cs *engine)
> -{
> - struct drm_i915_private *dev_priv = engine->i915;
> -
> - if (!dev_priv->status_page_dmah)
> - return;
> -
> - drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
> - engine->status_page.page_addr = NULL;
> -}
> -
> -static void cleanup_status_page(struct intel_engine_cs *engine)
> -{
> - struct i915_vma *vma;
> - struct drm_i915_gem_object *obj;
> -
> - vma = fetch_and_zero(&engine->status_page.vma);
> - if (!vma)
> - return;
> -
> - obj = vma->obj;
> -
> - i915_vma_unpin(vma);
> - i915_vma_close(vma);
> -
> - i915_gem_object_unpin_map(obj);
> - __i915_gem_object_release_unless_active(obj);
> -}
>
> -static int init_status_page(struct intel_engine_cs *engine)
> -{
> - struct drm_i915_gem_object *obj;
> - struct i915_vma *vma;
> - unsigned int flags;
> - void *vaddr;
> - int ret;
> -
> - obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
> - if (IS_ERR(obj)) {
> - DRM_ERROR("Failed to allocate status page\n");
> - return PTR_ERR(obj);
> - }
> -
> - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> - if (ret)
> - goto err;
> -
> - vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
> - if (IS_ERR(vma)) {
> - ret = PTR_ERR(vma);
> - goto err;
> - }
> -
> - flags = PIN_GLOBAL;
> - if (!HAS_LLC(engine->i915))
> - /* On g33, we cannot place HWS above 256MiB, so
> - * restrict its pinning to the low mappable arena.
> - * Though this restriction is not documented for
> - * gen4, gen5, or byt, they also behave similarly
> - * and hang if the HWS is placed at the top of the
> - * GTT. To generalise, it appears that all !llc
> - * platforms have issues with us placing the HWS
> - * above the mappable region (even though we never
> - * actualy map it).
> - */
> - flags |= PIN_MAPPABLE;
> - ret = i915_vma_pin(vma, 0, 4096, flags);
> - if (ret)
> - goto err;
> -
> - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> - if (IS_ERR(vaddr)) {
> - ret = PTR_ERR(vaddr);
> - goto err_unpin;
> - }
> -
> - engine->status_page.vma = vma;
> - engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
> - engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
> -
> - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
> - engine->name, i915_ggtt_offset(vma));
> - return 0;
> -
> -err_unpin:
> - i915_vma_unpin(vma);
> -err:
> - i915_gem_object_put(obj);
> - return ret;
> -}
> -
> -static int init_phys_status_page(struct intel_engine_cs *engine)
> -{
> - struct drm_i915_private *dev_priv = engine->i915;
> -
> - GEM_BUG_ON(engine->id != RCS);
> -
> - dev_priv->status_page_dmah =
> - drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
> - if (!dev_priv->status_page_dmah)
> - return -ENOMEM;
> -
> - engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
> - memset(engine->status_page.page_addr, 0, PAGE_SIZE);
> -
> - return 0;
> -}
>
> int intel_ring_pin(struct intel_ring *ring,
> struct drm_i915_private *i915,
> @@ -1567,17 +1461,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
> if (err)
> goto err;
>
> - if (HWS_NEEDS_PHYSICAL(engine->i915))
> - err = init_phys_status_page(engine);
> - else
> - err = init_status_page(engine);
> - if (err)
> - goto err;
> -
> ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
> if (IS_ERR(ring)) {
> err = PTR_ERR(ring);
> - goto err_hws;
> + goto err;
> }
>
> /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
> @@ -1592,11 +1479,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
>
> err_ring:
> intel_ring_free(ring);
> -err_hws:
> - if (HWS_NEEDS_PHYSICAL(engine->i915))
> - cleanup_phys_status_page(engine);
> - else
> - cleanup_status_page(engine);
> err:
> intel_engine_cleanup_common(engine);
> return err;
> @@ -1615,11 +1497,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
> if (engine->cleanup)
> engine->cleanup(engine);
>
> - if (HWS_NEEDS_PHYSICAL(dev_priv))
> - cleanup_phys_status_page(engine);
> - else
> - cleanup_status_page(engine);
> -
> intel_engine_cleanup_common(engine);
>
> dev_priv->engine[engine->id] = NULL;
>
More information about the Intel-gfx
mailing list