[PATCH 3/4] drm/xe: Do not perform GuC TLB invalidation for display GGTT

Matthew Brost matthew.brost at intel.com
Tue Mar 5 17:09:40 UTC 2024


On Tue, Mar 05, 2024 at 09:46:47AM -0500, Rodrigo Vivi wrote:
> On Tue, Mar 05, 2024 at 02:12:49PM +0100, Maarten Lankhorst wrote:
> > There should be no need to invalidate GuC for display, and it adds
> > a lot of latency for pinning/unpinning.
> 
> why 'display' is the special condition that doesn't need invalidation?
> isn't there anything else we can use to differentiate the issues?
> aren't we over invalidating then?
> 
> > 
> > Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> > ---
> >  drivers/gpu/drm/xe/display/xe_fb_pin.c        | 11 ++++++++---
> >  drivers/gpu/drm/xe/display/xe_plane_initial.c |  3 ++-
> >  drivers/gpu/drm/xe/xe_bo.h                    |  2 ++
> >  drivers/gpu/drm/xe/xe_ggtt.c                  | 16 ++++++++++------
> >  drivers/gpu/drm/xe/xe_ggtt.h                  |  4 ++--
> >  5 files changed, 24 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> > index 722c84a56607..95595d2cf1dc 100644
> > --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
> > +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> > @@ -100,16 +100,20 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
> >  		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
> >  					   ttm_bo_type_kernel,
> >  					   XE_BO_CREATE_VRAM0_BIT |
> > +					   XE_BO_CREATE_GGTT_BIT |
> > +					   XE_BO_DISPLAY_GGTT_BIT |
> >  					   XE_BO_CREATE_GGTT_BIT);
> >  	else
> >  		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
> >  					   ttm_bo_type_kernel,
> >  					   XE_BO_CREATE_STOLEN_BIT |
> > +					   XE_BO_DISPLAY_GGTT_BIT |
> >  					   XE_BO_CREATE_GGTT_BIT);
> >  	if (IS_ERR(dpt))
> >  		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
> >  					   ttm_bo_type_kernel,
> >  					   XE_BO_CREATE_SYSTEM_BIT |
> > +					   XE_BO_DISPLAY_GGTT_BIT |
> >  					   XE_BO_CREATE_GGTT_BIT);
> >  	if (IS_ERR(dpt))
> >  		return PTR_ERR(dpt);
> > @@ -201,6 +205,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
> >  
> >  	if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) {
> >  		vma->node = bo->ggtt_node;
> > +		goto out_unlock;
> >  	} else if (view->type == I915_GTT_VIEW_NORMAL) {
> >  		u32 x, size = bo->ttm.base.size;
> >  
> > @@ -238,7 +243,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
> >  					   rot_info->plane[i].dst_stride);
> >  	}
> >  
> > -	xe_ggtt_invalidate(ggtt);
> > +	xe_ggtt_invalidate(ggtt, true);
> 
> this is a bogus call... isn't it better to simply remove this line?
> and perhaps not even adding the display condition inside the function
> but checking on the outside?
> 

Agree with Rodrigo here, rather than adding an argument to
xe_ggtt_invalidate properly call or not call xe_ggtt_invalidate.

So here just delete xe_ggtt_invalidate.

> >  out_unlock:
> >  	mutex_unlock(&ggtt->lock);
> >  out:
> > @@ -321,7 +326,7 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma)
> >  		xe_bo_unpin_map_no_vm(vma->dpt);
> >  	else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) ||
> >  		 vma->bo->ggtt_node.start != vma->node.start)
> > -		xe_ggtt_remove_node(ggtt, &vma->node);
> > +		xe_ggtt_remove_node(ggtt, &vma->node, true);
> >  
> >  	ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
> >  	ttm_bo_unpin(&vma->bo->ttm);
> > @@ -381,4 +386,4 @@ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb)
> >  void intel_dpt_destroy(struct i915_address_space *vm)
> >  {
> >  	return;
> > -}
> > \ No newline at end of file
> > +}
> > diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
> > index 866d1dd6eeb4..79766b561d16 100644
> > --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
> > +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
> > @@ -62,7 +62,8 @@ initial_plane_bo(struct xe_device *xe,
> >  	if (plane_config->size == 0)
> >  		return NULL;
> >  
> > -	flags = XE_BO_CREATE_PINNED_BIT | XE_BO_SCANOUT_BIT | XE_BO_CREATE_GGTT_BIT;
> > +	flags = XE_BO_CREATE_PINNED_BIT | XE_BO_SCANOUT_BIT |
> > +		XE_BO_CREATE_GGTT_BIT | XE_BO_DISPLAY_GGTT_BIT;
> >  
> >  	base = round_down(plane_config->base, page_size);
> >  	if (IS_DGFX(xe)) {
> > diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> > index c59ad15961ce..1290c17c10f9 100644
> > --- a/drivers/gpu/drm/xe/xe_bo.h
> > +++ b/drivers/gpu/drm/xe/xe_bo.h
> > @@ -45,6 +45,8 @@
> >  #define XE_BO_PAGETABLE			BIT(12)
> >  #define XE_BO_NEEDS_CPU_ACCESS		BIT(13)
> >  #define XE_BO_NEEDS_UC			BIT(14)
> > +#define XE_BO_DISPLAY_GGTT_BIT		BIT(15)
> > +
> >  /* this one is trigger internally only */
> >  #define XE_BO_INTERNAL_TEST		BIT(30)
> >  #define XE_BO_INTERNAL_64K		BIT(31)
> > diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
> > index 325337c38961..bda5055a2a18 100644
> > --- a/drivers/gpu/drm/xe/xe_ggtt.c
> > +++ b/drivers/gpu/drm/xe/xe_ggtt.c
> > @@ -211,7 +211,7 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
> >  	drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
> >  		xe_ggtt_clear(ggtt, start, end - start);
> >  
> > -	xe_ggtt_invalidate(ggtt);
> > +	xe_ggtt_invalidate(ggtt, false);
> >  	mutex_unlock(&ggtt->lock);
> >  	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
> >  }
> > @@ -261,8 +261,12 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
> >  		drm_warn(&gt_to_xe(gt)->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err);
> >  }
> >  
> > -void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
> > +void xe_ggtt_invalidate(struct xe_ggtt *ggtt, bool display)
> >  {
> > +	/* Nothing to invalidate for display */
> > +	if (display)
> > +		return;
> > +
> >  	/* Each GT in a tile has its own TLB to cache GGTT lookups */
> >  	ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
> >  	ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);
> > @@ -388,7 +392,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
> >  		xe_ggtt_set_pte(ggtt, start + offset, pte);
> >  	}
> >  
> > -	xe_ggtt_invalidate(ggtt);
> > +	xe_ggtt_invalidate(ggtt, bo->flags & XE_BO_DISPLAY_GGTT_BIT);

Here would be:

if (!(bo->flags & XE_BO_DISPLAY_GGTT_BIT))
	xe_ggtt_invalidate()

Get the idea?

Matt

> >  }
> >  
> >  static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
> > @@ -433,7 +437,7 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
> >  	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
> >  }
> >  
> > -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
> > +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, bool display)
> >  {
> >  	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
> >  	mutex_lock(&ggtt->lock);
> > @@ -442,7 +446,7 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
> >  	drm_mm_remove_node(node);
> >  	node->size = 0;
> >  
> > -	xe_ggtt_invalidate(ggtt);
> > +	xe_ggtt_invalidate(ggtt, display);
> >  
> >  	mutex_unlock(&ggtt->lock);
> >  	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
> > @@ -456,7 +460,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
> >  	/* This BO is not currently in the GGTT */
> >  	xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
> >  
> > -	xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
> > +	xe_ggtt_remove_node(ggtt, &bo->ggtt_node, bo->flags & XE_BO_DISPLAY_GGTT_BIT);
> >  }
> >  
> >  int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p)
> > diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
> > index 42705e1338e1..bc2a6379a2e9 100644
> > --- a/drivers/gpu/drm/xe/xe_ggtt.h
> > +++ b/drivers/gpu/drm/xe/xe_ggtt.h
> > @@ -11,7 +11,7 @@
> >  struct drm_printer;
> >  
> >  void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
> > -void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
> > +void xe_ggtt_invalidate(struct xe_ggtt *ggtt, bool display);
> >  int xe_ggtt_init_early(struct xe_ggtt *ggtt);
> >  int xe_ggtt_init(struct xe_ggtt *ggtt);
> >  void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
> > @@ -24,7 +24,7 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
> >  int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
> >  				       struct drm_mm_node *node,
> >  				       u32 size, u32 align, u32 mm_flags);
> > -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node);
> > +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, bool display);
> >  void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
> >  int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
> >  int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
> > -- 
> > 2.43.0
> > 


More information about the Intel-xe mailing list