[Intel-gfx] [PATCH v3 3/5] drm/i915/lrc: allocate separate page for HWSP
Michel Thierry
michel.thierry at intel.com
Thu Jul 13 17:40:47 UTC 2017
On 13/07/17 03:27, Chris Wilson wrote:
> From: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>
> On gen8+ we're currently using the PPHWSP of the kernel ctx as the
> global HWSP. However, when the kernel ctx gets submitted (e.g. from
> __intel_autoenable_gt_powersave) the HW will use that page as both
> HWSP and PPHWSP. This causes a conflict in the register arena of the
> HWSP, i.e. dword indices below 0x30. We don't current utilize this arena,
> but in the following patches we will take advantage of the cached
> register state for handling execlist's context status interrupt.
>
> To avoid the conflict, instead of re-using the PPHWSP of the kernel
> ctx we can allocate a separate page for the HWSP like what happens for
> pre-execlists platform.
>
> v2: Add a use-case for the register arena of the HWSP.
>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com>
> Link: http://patchwork.freedesktop.org/patch/msgid/1499357440-34688-1-git-send-email-daniele.ceraolospurio@intel.com
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Reviewded-by: Michel Thierry <michel.thierry at intel.com>
^ that's why patchwork didn't detect the r-b.
> ---
> drivers/gpu/drm/i915/intel_engine_cs.c | 126 +++++++++++++++++++++++++++++++-
> drivers/gpu/drm/i915/intel_lrc.c | 34 +--------
> drivers/gpu/drm/i915/intel_ringbuffer.c | 125 +------------------------------
> 3 files changed, 127 insertions(+), 158 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 24db316e0fd1..cba120f3d89d 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -445,6 +445,114 @@ static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
> i915_vma_unpin_and_release(&engine->scratch);
> }
>
> +static void cleanup_phys_status_page(struct intel_engine_cs *engine)
> +{
> + struct drm_i915_private *dev_priv = engine->i915;
> +
> + if (!dev_priv->status_page_dmah)
> + return;
> +
> + drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
> + engine->status_page.page_addr = NULL;
> +}
> +
> +static void cleanup_status_page(struct intel_engine_cs *engine)
> +{
> + struct i915_vma *vma;
> + struct drm_i915_gem_object *obj;
> +
> + vma = fetch_and_zero(&engine->status_page.vma);
> + if (!vma)
> + return;
> +
> + obj = vma->obj;
> +
> + i915_vma_unpin(vma);
> + i915_vma_close(vma);
> +
> + i915_gem_object_unpin_map(obj);
> + __i915_gem_object_release_unless_active(obj);
> +}
> +
> +static int init_status_page(struct intel_engine_cs *engine)
> +{
> + struct drm_i915_gem_object *obj;
> + struct i915_vma *vma;
> + unsigned int flags;
> + void *vaddr;
> + int ret;
> +
> + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
> + if (IS_ERR(obj)) {
> + DRM_ERROR("Failed to allocate status page\n");
> + return PTR_ERR(obj);
> + }
> +
> + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> + if (ret)
> + goto err;
> +
> + vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
> + if (IS_ERR(vma)) {
> + ret = PTR_ERR(vma);
> + goto err;
> + }
> +
> + flags = PIN_GLOBAL;
> + if (!HAS_LLC(engine->i915))
> + /* On g33, we cannot place HWS above 256MiB, so
> + * restrict its pinning to the low mappable arena.
> + * Though this restriction is not documented for
> + * gen4, gen5, or byt, they also behave similarly
> + * and hang if the HWS is placed at the top of the
> + * GTT. To generalise, it appears that all !llc
> + * platforms have issues with us placing the HWS
> + * above the mappable region (even though we never
> + * actualy map it).
> + */
> + flags |= PIN_MAPPABLE;
> + ret = i915_vma_pin(vma, 0, 4096, flags);
> + if (ret)
> + goto err;
> +
> + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> + if (IS_ERR(vaddr)) {
> + ret = PTR_ERR(vaddr);
> + goto err_unpin;
> + }
> +
> + engine->status_page.vma = vma;
> + engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
> + engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
> +
> + DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
> + engine->name, i915_ggtt_offset(vma));
> + return 0;
> +
> +err_unpin:
> + i915_vma_unpin(vma);
> +err:
> + i915_gem_object_put(obj);
> + return ret;
> +}
> +
> +static int init_phys_status_page(struct intel_engine_cs *engine)
> +{
> + struct drm_i915_private *dev_priv = engine->i915;
> +
> + GEM_BUG_ON(engine->id != RCS);
> +
> + dev_priv->status_page_dmah =
> + drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
> + if (!dev_priv->status_page_dmah)
> + return -ENOMEM;
> +
> + engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
> + memset(engine->status_page.page_addr, 0, PAGE_SIZE);
> +
> + return 0;
> +}
> +
> /**
> * intel_engines_init_common - initialize cengine state which might require hw access
> * @engine: Engine to initialize.
> @@ -480,10 +588,21 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>
> ret = i915_gem_render_state_init(engine);
> if (ret)
> - goto err_unpin;
> + goto err_breadcrumbs;
> +
> + if (HWS_NEEDS_PHYSICAL(engine->i915))
> + ret = init_phys_status_page(engine);
> + else
> + ret = init_status_page(engine);
> + if (ret)
> + goto err_rs_fini;
>
> return 0;
>
> +err_rs_fini:
> + i915_gem_render_state_fini(engine);
> +err_breadcrumbs:
> + intel_engine_fini_breadcrumbs(engine);
> err_unpin:
> engine->context_unpin(engine, engine->i915->kernel_context);
> return ret;
> @@ -500,6 +619,11 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
> {
> intel_engine_cleanup_scratch(engine);
>
> + if (HWS_NEEDS_PHYSICAL(engine->i915))
> + cleanup_phys_status_page(engine);
> + else
> + cleanup_status_page(engine);
> +
> i915_gem_render_state_fini(engine);
> intel_engine_fini_breadcrumbs(engine);
> intel_engine_cleanup_cmd_parser(engine);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index fde145c552ef..3469badedbe0 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1641,11 +1641,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
> if (engine->cleanup)
> engine->cleanup(engine);
>
> - if (engine->status_page.vma) {
> - i915_gem_object_unpin_map(engine->status_page.vma->obj);
> - engine->status_page.vma = NULL;
> - }
> -
> intel_engine_cleanup_common(engine);
>
> lrc_destroy_wa_ctx(engine);
> @@ -1692,24 +1687,6 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
> engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
> }
>
> -static int
> -lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
> -{
> - const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE;
> - void *hws;
> -
> - /* The HWSP is part of the default context object in LRC mode. */
> - hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
> - if (IS_ERR(hws))
> - return PTR_ERR(hws);
> -
> - engine->status_page.page_addr = hws + hws_offset;
> - engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset;
> - engine->status_page.vma = vma;
> -
> - return 0;
> -}
> -
> static void
> logical_ring_setup(struct intel_engine_cs *engine)
> {
> @@ -1742,23 +1719,14 @@ logical_ring_setup(struct intel_engine_cs *engine)
> logical_ring_default_irqs(engine);
> }
>
> -static int
> -logical_ring_init(struct intel_engine_cs *engine)
> +static int logical_ring_init(struct intel_engine_cs *engine)
> {
> - struct i915_gem_context *dctx = engine->i915->kernel_context;
> int ret;
>
> ret = intel_engine_init_common(engine);
> if (ret)
> goto error;
>
> - /* And setup the hardware status page. */
> - ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
> - if (ret) {
> - DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret);
> - goto error;
> - }
> -
> return 0;
>
> error:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 5224b7abb8a3..b2580539051e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1174,113 +1174,7 @@ i915_emit_bb_start(struct drm_i915_gem_request *req,
> return 0;
> }
>
> -static void cleanup_phys_status_page(struct intel_engine_cs *engine)
> -{
> - struct drm_i915_private *dev_priv = engine->i915;
> -
> - if (!dev_priv->status_page_dmah)
> - return;
> -
> - drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
> - engine->status_page.page_addr = NULL;
> -}
> -
> -static void cleanup_status_page(struct intel_engine_cs *engine)
> -{
> - struct i915_vma *vma;
> - struct drm_i915_gem_object *obj;
> -
> - vma = fetch_and_zero(&engine->status_page.vma);
> - if (!vma)
> - return;
> -
> - obj = vma->obj;
> -
> - i915_vma_unpin(vma);
> - i915_vma_close(vma);
> -
> - i915_gem_object_unpin_map(obj);
> - __i915_gem_object_release_unless_active(obj);
> -}
>
> -static int init_status_page(struct intel_engine_cs *engine)
> -{
> - struct drm_i915_gem_object *obj;
> - struct i915_vma *vma;
> - unsigned int flags;
> - void *vaddr;
> - int ret;
> -
> - obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
> - if (IS_ERR(obj)) {
> - DRM_ERROR("Failed to allocate status page\n");
> - return PTR_ERR(obj);
> - }
> -
> - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> - if (ret)
> - goto err;
> -
> - vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
> - if (IS_ERR(vma)) {
> - ret = PTR_ERR(vma);
> - goto err;
> - }
> -
> - flags = PIN_GLOBAL;
> - if (!HAS_LLC(engine->i915))
> - /* On g33, we cannot place HWS above 256MiB, so
> - * restrict its pinning to the low mappable arena.
> - * Though this restriction is not documented for
> - * gen4, gen5, or byt, they also behave similarly
> - * and hang if the HWS is placed at the top of the
> - * GTT. To generalise, it appears that all !llc
> - * platforms have issues with us placing the HWS
> - * above the mappable region (even though we never
> - * actualy map it).
> - */
> - flags |= PIN_MAPPABLE;
> - ret = i915_vma_pin(vma, 0, 4096, flags);
> - if (ret)
> - goto err;
> -
> - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> - if (IS_ERR(vaddr)) {
> - ret = PTR_ERR(vaddr);
> - goto err_unpin;
> - }
> -
> - engine->status_page.vma = vma;
> - engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
> - engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
> -
> - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
> - engine->name, i915_ggtt_offset(vma));
> - return 0;
> -
> -err_unpin:
> - i915_vma_unpin(vma);
> -err:
> - i915_gem_object_put(obj);
> - return ret;
> -}
> -
> -static int init_phys_status_page(struct intel_engine_cs *engine)
> -{
> - struct drm_i915_private *dev_priv = engine->i915;
> -
> - GEM_BUG_ON(engine->id != RCS);
> -
> - dev_priv->status_page_dmah =
> - drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
> - if (!dev_priv->status_page_dmah)
> - return -ENOMEM;
> -
> - engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
> - memset(engine->status_page.page_addr, 0, PAGE_SIZE);
> -
> - return 0;
> -}
>
> int intel_ring_pin(struct intel_ring *ring,
> struct drm_i915_private *i915,
> @@ -1567,17 +1461,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
> if (err)
> goto err;
>
> - if (HWS_NEEDS_PHYSICAL(engine->i915))
> - err = init_phys_status_page(engine);
> - else
> - err = init_status_page(engine);
> - if (err)
> - goto err;
> -
> ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
> if (IS_ERR(ring)) {
> err = PTR_ERR(ring);
> - goto err_hws;
> + goto err;
> }
>
> /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
> @@ -1592,11 +1479,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
>
> err_ring:
> intel_ring_free(ring);
> -err_hws:
> - if (HWS_NEEDS_PHYSICAL(engine->i915))
> - cleanup_phys_status_page(engine);
> - else
> - cleanup_status_page(engine);
> err:
> intel_engine_cleanup_common(engine);
> return err;
> @@ -1615,11 +1497,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
> if (engine->cleanup)
> engine->cleanup(engine);
>
> - if (HWS_NEEDS_PHYSICAL(dev_priv))
> - cleanup_phys_status_page(engine);
> - else
> - cleanup_status_page(engine);
> -
> intel_engine_cleanup_common(engine);
>
> dev_priv->engine[engine->id] = NULL;
>
More information about the Intel-gfx
mailing list