[Intel-gfx] [PATCH v3] drm/i915: Use a cached mapping for the physical HWS

Wed May 31 09:35:23 UTC 2017

On Mon, May 22, 2017 at 12:55:14PM +0100, Chris Wilson wrote:
> Older gen use a physical address for the hardware status page, for which
> we use cache-coherent writes. As the writes are into the cpu cache, we use
> a normal WB mapped page to read the HWS, used for our seqno tracking.
> 
> Anecdotally, I observed lost breadcrumbs writes into the HWS on i965gm,
> which so far have not reoccurred with this patch. How reliable that
> evidence is remains to be seen.
> 
> v2: Explicitly pass the expected physical address to the hw
> v3: Also remember the wild writes we once had for HWS above 4G.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel at ffwll.ch>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>

I'm still not sold on the story, but as a cleanup it seems a ok idea.
Getting rid of the drm dma api wrappers is definitely good, since
drm-the-OS-abstraction is dead.

Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>

> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  1 -
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 29 +++++++++++++++--------------
>  2 files changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 90787e0084f2..1a7961ef4399 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2109,7 +2109,6 @@ struct drm_i915_private {
>  	struct i915_gem_context *kernel_context;
>  	struct intel_engine_cs *engine[I915_NUM_ENGINES];
>  
> -	struct drm_dma_handle *status_page_dmah;
>  	struct resource mch_res;
>  
>  	/* protects the irq masks */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index c32a4ba9579f..26808f2bf748 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -340,11 +340,14 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
>  static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
> +	struct page *page = virt_to_page(engine->status_page.page_addr);
> +	phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
>  	u32 addr;
>  
> -	addr = dev_priv->status_page_dmah->busaddr;
> +	addr = lower_32_bits(phys);
>  	if (INTEL_GEN(dev_priv) >= 4)
> -		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
> +		addr |= (phys >> 28) & 0xf0;
> +
>  	I915_WRITE(HWS_PGA, addr);
>  }
>  
> @@ -1000,12 +1003,10 @@ i915_emit_bb_start(struct drm_i915_gem_request *req,
>  
>  static void cleanup_phys_status_page(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	if (!dev_priv->status_page_dmah)
> +	if (!engine->status_page.page_addr)
>  		return;
>  
> -	drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
> +	__free_page(virt_to_page(engine->status_page.page_addr));
>  	engine->status_page.page_addr = NULL;
>  }
>  
> @@ -1091,17 +1092,17 @@ static int init_status_page(struct intel_engine_cs *engine)
>  
>  static int init_phys_status_page(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	GEM_BUG_ON(engine->id != RCS);
> +	struct page *page;
>  
> -	dev_priv->status_page_dmah =
> -		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
> -	if (!dev_priv->status_page_dmah)
> +	/* Though the HWS register does support 36bit addresses, historically
> +	 * we have had hangs and corruption reported due to wild writes if
> +	 * the HWS is placed above 4G.
> +	 */
> +	page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
> +	if (!page)
>  		return -ENOMEM;
>  
> -	engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
> -	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
> +	engine->status_page.page_addr = page_address(page);
>  
>  	return 0;
>  }
> -- 
> 2.11.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch