[Intel-gfx] [PATCH 39/46] drm/i915: Always allocate an object/vma for the HWSP

Thu Jan 10 10:52:48 UTC 2019

On Mon, 7 Jan 2019 at 11:55, Chris Wilson <chris at chris-wilson.co.uk> wrote:
>
> Currently we only allocate an object and vma if we are using a GGTT
> virtual HWSP, and a plain struct page for a physical HWSP. For
> convenience later on with global timelines, it will be useful to always
> have the status page being tracked by a struct i915_vma. Make it so.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_engine_cs.c       | 109 ++++++++++---------
>  drivers/gpu/drm/i915/intel_guc_submission.c  |   5 +
>  drivers/gpu/drm/i915/intel_lrc.c             |  11 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c      |  20 +++-
>  drivers/gpu/drm/i915/intel_ringbuffer.h      |  23 +---
>  drivers/gpu/drm/i915/selftests/mock_engine.c |   2 +-
>  6 files changed, 90 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 1a9de4a01b9d..ffef7f43fda3 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -506,27 +506,61 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
>
>  static void cleanup_status_page(struct intel_engine_cs *engine)
>  {
> +       struct i915_vma *vma;
> +
>         /* Prevent writes into HWSP after returning the page to the system */
>         intel_engine_set_hwsp_writemask(engine, ~0u);
>
> -       if (HWS_NEEDS_PHYSICAL(engine->i915)) {
> -               void *addr = fetch_and_zero(&engine->status_page.page_addr);
> +       vma = fetch_and_zero(&engine->status_page.vma);
> +       if (!vma)
> +               return;
>
> -               __free_page(virt_to_page(addr));
> -       }
> +       if (!HWS_NEEDS_PHYSICAL(engine->i915))
> +               i915_vma_unpin(vma);
> +
> +       i915_gem_object_unpin_map(vma->obj);
> +       __i915_gem_object_release_unless_active(vma->obj);
> +}
> +
> +static int pin_ggtt_status_page(struct intel_engine_cs *engine,
> +                               struct i915_vma *vma)
> +{
> +       unsigned int flags;
> +
> +       flags = PIN_GLOBAL;
> +       if (!HAS_LLC(engine->i915))
> +               /*
> +                * On g33, we cannot place HWS above 256MiB, so
> +                * restrict its pinning to the low mappable arena.
> +                * Though this restriction is not documented for
> +                * gen4, gen5, or byt, they also behave similarly
> +                * and hang if the HWS is placed at the top of the
> +                * GTT. To generalise, it appears that all !llc
> +                * platforms have issues with us placing the HWS
> +                * above the mappable region (even though we never
> +                * actually map it).
> +                */
> +               flags |= PIN_MAPPABLE;
> +       else
> +               flags |= PIN_HIGH;
>
> -       i915_vma_unpin_and_release(&engine->status_page.vma,
> -                                  I915_VMA_RELEASE_MAP);
> +       return i915_vma_pin(vma, 0, 0, flags);
>  }
>
>  static int init_status_page(struct intel_engine_cs *engine)
>  {
>         struct drm_i915_gem_object *obj;
>         struct i915_vma *vma;
> -       unsigned int flags;
>         void *vaddr;
>         int ret;
>
> +       /*
> +        * Though the HWS register does support 36bit addresses, historically
> +        * we have had hangs and corruption reported due to wild writes if
> +        * the HWS is placed above 4G. We only allow objects to be allocated
> +        * in GFP_DMA32 for i965, and no earlier physical address users had
> +        * access to more than 4G.
> +        */
>         obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
>         if (IS_ERR(obj)) {
>                 DRM_ERROR("Failed to allocate status page\n");
> @@ -543,61 +577,30 @@ static int init_status_page(struct intel_engine_cs *engine)
>                 goto err;
>         }
>
> -       flags = PIN_GLOBAL;
> -       if (!HAS_LLC(engine->i915))
> -               /* On g33, we cannot place HWS above 256MiB, so
> -                * restrict its pinning to the low mappable arena.
> -                * Though this restriction is not documented for
> -                * gen4, gen5, or byt, they also behave similarly
> -                * and hang if the HWS is placed at the top of the
> -                * GTT. To generalise, it appears that all !llc
> -                * platforms have issues with us placing the HWS
> -                * above the mappable region (even though we never
> -                * actually map it).
> -                */
> -               flags |= PIN_MAPPABLE;
> -       else
> -               flags |= PIN_HIGH;
> -       ret = i915_vma_pin(vma, 0, 0, flags);
> -       if (ret)
> -               goto err;
> -
>         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
>         if (IS_ERR(vaddr)) {
>                 ret = PTR_ERR(vaddr);
> -               goto err_unpin;
> +               goto err;
>         }
>
> +       engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
>         engine->status_page.vma = vma;
> -       engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
> -       engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
> +
> +       if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
> +               ret = pin_ggtt_status_page(engine, vma);
> +               if (ret)
> +                       goto err_unpin;
> +       }

Don't we now need special casing in gem_record_rings, since the error
capture will now try to iterate over vma->pages for the status page?