[Intel-xe] [PATCH 9/9] fixup! drm/xe/display: Implement display support

Tue Sep 26 21:04:48 UTC 2023

On Tue, Sep 26, 2023 at 03:48:56PM -0500, Lucas De Marchi wrote:
> On Mon, Sep 25, 2023 at 04:42:04PM -0700, Matt Roper wrote:
> > On Mon, Sep 25, 2023 at 03:10:49PM -0700, Lucas De Marchi wrote:
> > > Fix build break due to previous commit. Squash on "Implement display
> > > support" to be moved above the previous commit.
> > 
> > I know Matt Auld's series is going to come through and replace all the
> > hardcoded cache settings with the proper PAT index for whatever object
> > we're operating on, but for the time being should we at least switch the
> > hardcoding in this patch to be CACHE_WT rather than CACHE_WB?  Since
> > display is never coherent (regardless of the platform's CPU<->GT
> > coherency behavior), WB would pretty much always be the wrong thing for
> > surfaces that we're mapping into a DPT.
> 
> makes sense, but this means we are changing the behavior for all the
> platforms with display. MTL was using a hack to use index 3 and all
> the others were (implicitly) using index 0, which means WB for all the
> platforms with display.

Yeah, good point.  And now that I look at this again with fresh eyes, I
see that I mixed up CPU:WT with GPU:WT.  It's the CPU-side mapping of
scanout buffers that needs to be WT (or WC/UC although those would give
lower performance), not the GPU-side mapping in the PTEs.  I think
GPU:WB should still be fine since we'll be issuing a PIPE_CONTROL /
MI_FLUSH_DW after generating the content, before it gets flipped to a
display plane.

So disregard my earlier comments.

Reviewed-by: Matt Roper <matthew.d.roper at intel.com>

This isn't the first time there's been mixups about whether WB/WT/WC/UC
terms in the code refer to the CPU or GPU side.  I know Matt Auld's
series has added some "cpu" terminology to some of the fields/defines to
help make the code more obvious; maybe these XE_CACHE_* macros also need
to pick up a "_GPU_" in their name as well at some point.

Matt

> 
> Lucas De Marchi
> 
> > 
> > 
> > Matt
> > 
> > > 
> > > Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/display/xe_fb_pin.c | 32 ++++++++++++++++++--------
> > >  1 file changed, 23 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> > > index 3422942a9951..b7a04fba3585 100644
> > > --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
> > > +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> > > @@ -17,7 +17,10 @@ static void
> > >  write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs,
> > >  		  u32 width, u32 height, u32 src_stride, u32 dst_stride)
> > >  {
> > > +	struct xe_device *xe = xe_bo_device(bo);
> > > +	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
> > >  	u32 column, row;
> > > +
> > >  	/* TODO: Maybe rewrite so we can traverse the bo addresses sequentially,
> > >  	 * by writing dpt/ggtt in a different order?
> > >  	 */
> > > @@ -26,8 +29,10 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_
> > >  		u32 src_idx = src_stride * (height - 1) + column + bo_ofs;
> > > 
> > >  		for (row = 0; row < height; row++) {
> > > -			iosys_map_wr(map, *dpt_ofs, u64,
> > > -				     xe_ggtt_pte_encode(bo, src_idx * XE_PAGE_SIZE));
> > > +			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
> > > +							      XE_CACHE_WB);
> > > +
> > > +			iosys_map_wr(map, *dpt_ofs, u64, pte);
> > >  			*dpt_ofs += 8;
> > >  			src_idx -= src_stride;
> > >  		}
> > > @@ -46,6 +51,7 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
> > >  {
> > >  	struct xe_device *xe = to_xe_device(fb->base.dev);
> > >  	struct xe_tile *tile0 = xe_device_get_root_tile(xe);
> > > +	struct xe_ggtt *ggtt = tile0->mem.ggtt;
> > >  	struct xe_bo *bo = intel_fb_obj(&fb->base), *dpt;
> > >  	u32 dpt_size, size = bo->ttm.base.size;
> > > 
> > > @@ -76,9 +82,12 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
> > >  	if (view->type == I915_GTT_VIEW_NORMAL) {
> > >  		u32 x;
> > > 
> > > -		for (x = 0; x < size / XE_PAGE_SIZE; x++)
> > > -			iosys_map_wr(&dpt->vmap, x * 8, u64,
> > > -				     xe_ggtt_pte_encode(bo, x * XE_PAGE_SIZE));
> > > +		for (x = 0; x < size / XE_PAGE_SIZE; x++) {
> > > +			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE,
> > > +							      XE_CACHE_WB);
> > > +
> > > +			iosys_map_wr(&dpt->vmap, x * 8, u64, pte);
> > > +		}
> > >  	} else {
> > >  		const struct intel_rotation_info *rot_info = &view->rotated;
> > >  		u32 i, dpt_ofs = 0;
> > > @@ -107,8 +116,10 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo
> > >  		u32 src_idx = src_stride * (height - 1) + column + bo_ofs;
> > > 
> > >  		for (row = 0; row < height; row++) {
> > > -			xe_ggtt_set_pte(ggtt, *ggtt_ofs,
> > > -					xe_ggtt_pte_encode(bo, src_idx * XE_PAGE_SIZE));
> > > +			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
> > > +							      XE_CACHE_WB);
> > > +
> > > +			xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte);
> > >  			*ggtt_ofs += XE_PAGE_SIZE;
> > >  			src_idx -= src_stride;
> > >  		}
> > > @@ -150,8 +161,11 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
> > >  		if (ret)
> > >  			goto out_unlock;
> > > 
> > > -		for (x = 0; x < size; x += XE_PAGE_SIZE)
> > > -			xe_ggtt_set_pte(ggtt, vma->node.start + x, xe_ggtt_pte_encode(bo, x));
> > > +		for (x = 0; x < size; x += XE_PAGE_SIZE) {
> > > +			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, XE_CACHE_WB);
> > > +
> > > +			xe_ggtt_set_pte(ggtt, vma->node.start + x, pte);
> > > +		}
> > >  	} else {
> > >  		u32 i, ggtt_ofs;
> > >  		const struct intel_rotation_info *rot_info = &view->rotated;
> > > --
> > > 2.40.1
> > > 
> > 
> > -- 
> > Matt Roper
> > Graphics Software Engineer
> > Linux GPU Platform Enablement
> > Intel Corporation

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation