[Intel-gfx] [PATCH 5/5] drm/i915/adlp: Add support for remapping CCS FBs

Imre Deak imre.deak at intel.com
Sun Sep 5 10:55:59 UTC 2021


On Sat, Sep 04, 2021 at 02:54:31PM +0300, Juha-Pekka Heikkila wrote:
> Hi Imre,
> 
> other than that one question for this patch and one missing static
> declaration what kernel test bot said about gen12_ccs_aux_stride(..) changes
> look ok to me.
> 
> with those checked this series is
> 
> Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>

Thanks.

> On 27.8.2021 18.09, Imre Deak wrote:
> > Add support for remapping CCS FBs on ADL-P to remove the restriction
> > of the power-of-two sized stride and the 2MB surface offset alignment
> > for these FBs.
> > 
> > We can only remap the tiles on the main surface, not the tiles on the
> > CCS surface, so userspace has to generate the CCS surface aligning to
> > the POT size padded main surface stride (by programming the AUX
> > pagetable accordingly). For the required AUX pagetable setup, this
> > requires that either the main surface stride is 8 tiles or that the
> > stride is 16 tiles aligned (= 64 kbytes, the area mapped by one AUX
> > PTE).
> > 
> > Signed-off-by: Imre Deak <imre.deak at intel.com>
> > ---
> >   drivers/gpu/drm/i915/display/intel_display.c  |  5 +-
> >   .../drm/i915/display/intel_display_types.h    |  2 -
> >   drivers/gpu/drm/i915/display/intel_fb.c       | 78 +++++++++++--------
> >   drivers/gpu/drm/i915/gt/intel_ggtt.c          | 28 ++++++-
> >   drivers/gpu/drm/i915/i915_vma_types.h         |  7 +-
> >   5 files changed, 79 insertions(+), 41 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> > index 4d35f3b087d7e..3e64fda072789 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display.c
> > +++ b/drivers/gpu/drm/i915/display/intel_display.c
> > @@ -892,8 +892,11 @@ unsigned int intel_remapped_info_size(const struct intel_remapped_info *rem_info
> >   	unsigned int size = 0;
> >   	int i;
> > -	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++)
> > +	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
> > +		if (rem_info->plane_alignment)
> > +			size = ALIGN(size, rem_info->plane_alignment);
> >   		size += rem_info->plane[i].dst_stride * rem_info->plane[i].height;
> 
> Would above size alignment need to happen after size has been added?

Only the offset of each plane within the view needs to be aligned, but
the total size of the view doesn't need to be. By that we can avoid
having to map any padding pages at the end of the view.

> > +	}
> >   	return size;
> >   }
> > diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
> > index c7bcf9183447b..e97741c2f4e32 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display_types.h
> > +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
> > @@ -103,8 +103,6 @@ struct intel_fb_view {
> >   	 * in the rotated and remapped GTT view all no-CCS formats (up to 2
> >   	 * color planes) are supported.
> >   	 *
> > -	 * TODO: add support for CCS formats in the remapped GTT view.
> > -	 *
> >   	 * The view information shared by all FB color planes in the FB,
> >   	 * like dst x/y and src/dst width, is stored separately in
> >   	 * intel_plane_state.
> > diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
> > index 83262cb347196..bcca78c84290b 100644
> > --- a/drivers/gpu/drm/i915/display/intel_fb.c
> > +++ b/drivers/gpu/drm/i915/display/intel_fb.c
> > @@ -356,15 +356,29 @@ void intel_fb_plane_get_subsampling(int *hsub, int *vsub,
> >   static void intel_fb_plane_dims(const struct intel_framebuffer *fb, int color_plane, int *w, int *h)
> >   {
> > +	struct drm_i915_private *i915 = to_i915(fb->base.dev);
> >   	int main_plane = is_ccs_plane(&fb->base, color_plane) ?
> >   			 skl_ccs_to_main_plane(&fb->base, color_plane) : 0;
> > +	unsigned int main_width = fb->base.width;
> > +	unsigned int main_height = fb->base.height;
> >   	int main_hsub, main_vsub;
> >   	int hsub, vsub;
> > +	/*
> > +	 * On ADL-P the CCS AUX surface layout always aligns with the
> > +	 * power-of-two aligned main surface stride. The main surface
> > +	 * stride in the allocated FB object may not be power-of-two
> > +	 * sized, in which case it is auto-padded to the POT size.
> > +	 */
> > +	if (IS_ALDERLAKE_P(i915) && is_ccs_plane(&fb->base, color_plane))
> > +		main_width = gen12_aligned_scanout_stride(fb, 0) /
> > +			     fb->base.format->cpp[0];
> > +
> >   	intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, &fb->base, main_plane);
> >   	intel_fb_plane_get_subsampling(&hsub, &vsub, &fb->base, color_plane);
> > -	*w = fb->base.width / main_hsub / hsub;
> > -	*h = fb->base.height / main_vsub / vsub;
> > +
> > +	*w = main_width / main_hsub / hsub;
> > +	*h = main_height / main_vsub / vsub;
> >   }
> >   static u32 intel_adjust_tile_offset(int *x, int *y,
> > @@ -546,16 +560,7 @@ static int intel_fb_offset_to_xy(int *x, int *y,
> >   	unsigned int height;
> >   	u32 alignment;
> > -	/*
> > -	 * All DPT color planes must be 512*4k aligned (the amount mapped by a
> > -	 * single DPT page). For ADL_P CCS FBs this only works by requiring
> > -	 * the allocated offsets to be 2MB aligned.  Once supoort to remap
> > -	 * such FBs is added we can remove this requirement, as then all the
> > -	 * planes can be remapped to an aligned offset.
> > -	 */
> > -	if (IS_ALDERLAKE_P(i915) && is_ccs_modifier(fb->modifier))
> > -		alignment = 512 * 4096;
> > -	else if (DISPLAY_VER(i915) >= 12 &&
> > +	if (DISPLAY_VER(i915) >= 12 &&
> >   		 is_semiplanar_uv_plane(fb, color_plane))
> >   		alignment = intel_tile_row_size(fb, color_plane);
> >   	else if (fb->modifier != DRM_FORMAT_MOD_LINEAR)
> > @@ -687,8 +692,7 @@ bool intel_fb_needs_pot_stride_remap(const struct intel_framebuffer *fb)
> >   {
> >   	struct drm_i915_private *i915 = to_i915(fb->base.dev);
> > -	return IS_ALDERLAKE_P(i915) && fb->base.modifier != DRM_FORMAT_MOD_LINEAR &&
> > -	       !is_ccs_modifier(fb->base.modifier);
> > +	return IS_ALDERLAKE_P(i915) && fb->base.modifier != DRM_FORMAT_MOD_LINEAR;
> >   }
> >   static int intel_fb_pitch(const struct intel_framebuffer *fb, int color_plane, unsigned int rotation)
> > @@ -808,14 +812,16 @@ static unsigned int
> >   plane_view_dst_stride_tiles(const struct intel_framebuffer *fb, int color_plane,
> >   			    unsigned int pitch_tiles)
> >   {
> > -	if (intel_fb_needs_pot_stride_remap(fb))
> > +	if (intel_fb_needs_pot_stride_remap(fb)) {
> > +		unsigned int min_stride = is_ccs_plane(&fb->base, color_plane) ? 2 : 8;
> >   		/*
> >   		 * ADL_P, the only platform needing a POT stride has a minimum
> > -		 * of 8 stride tiles.
> > +		 * of 8 main surface and 2 CCS AUX stride tiles.
> >   		 */
> > -		return roundup_pow_of_two(max(pitch_tiles, 8u));
> > -	else
> > +		return roundup_pow_of_two(max(pitch_tiles, min_stride));
> > +	} else {
> >   		return pitch_tiles;
> > +	}
> >   }
> >   static unsigned int
> > @@ -851,13 +857,22 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p
> >   	unsigned int tile_height = dims->tile_height;
> >   	unsigned int tile_size = intel_tile_size(i915);
> >   	struct drm_rect r;
> > -	u32 size;
> > +	u32 size = 0;
> >   	assign_chk_ovf(i915, remap_info->offset, obj_offset);
> >   	assign_chk_ovf(i915, remap_info->src_stride, plane_view_src_stride_tiles(fb, color_plane, dims));
> >   	assign_chk_ovf(i915, remap_info->width, plane_view_width_tiles(fb, color_plane, dims, x));
> >   	assign_chk_ovf(i915, remap_info->height, plane_view_height_tiles(fb, color_plane, dims, y));
> > +	if (IS_ALDERLAKE_P(i915)) {
> > +		unsigned int alignment = SZ_2M / PAGE_SIZE;
> > +		unsigned int aligned_offset = ALIGN(gtt_offset, alignment);
> > +
> > +		view->gtt.remapped.plane_alignment = alignment;
> > +		size += aligned_offset - gtt_offset;
> > +		gtt_offset = aligned_offset;
> > +	}
> > +
> >   	if (view->gtt.type == I915_GGTT_VIEW_ROTATED) {
> >   		check_array_bounds(i915, view->gtt.rotated.plane, color_plane);
> > @@ -876,7 +891,7 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p
> >   		color_plane_info->stride = remap_info->dst_stride * tile_height;
> > -		size = remap_info->dst_stride * remap_info->width;
> > +		size += remap_info->dst_stride * remap_info->width;
> >   		/* rotate the tile dimensions to match the GTT view */
> >   		swap(tile_width, tile_height);
> > @@ -894,7 +909,7 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p
> >   		color_plane_info->stride = remap_info->dst_stride * tile_width *
> >   					   fb->base.format->cpp[color_plane];
> > -		size = remap_info->dst_stride * remap_info->height;
> > +		size += remap_info->dst_stride * remap_info->height;
> >   	}
> >   	/*
> > @@ -1157,11 +1172,19 @@ intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane)
> >   	tile_width = intel_tile_width_bytes(fb, color_plane);
> >   	if (is_ccs_modifier(fb->modifier)) {
> > +		/*
> > +		 * On ADL-P the stride must be either 8 tiles or a stride
> > +		 * that is aligned to 16 tiles, required by the 16 tiles =
> > +		 * 64 kbyte CCS AUX PTE granularity, allowing CCS FBs to be
> > +		 * remapped.
> > +		 */
> > +		if (IS_ALDERLAKE_P(dev_priv))
> > +			tile_width *= fb->pitches[0] <= tile_width * 8 ? 8 : 16;
> >   		/*
> >   		 * On TGL the surface stride must be 4 tile aligned, mapped by
> >   		 * one 64 byte cacheline on the CCS AUX surface.
> >   		 */
> > -		if (DISPLAY_VER(dev_priv) >= 12)
> > +		else if (DISPLAY_VER(dev_priv) >= 12)
> >   			tile_width *= 4;
> >   		/*
> >   		 * Display WA #0531: skl,bxt,kbl,glk
> > @@ -1415,17 +1438,6 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
> >   			}
> >   		}
> > -		/* TODO: Add POT stride remapping support for CCS formats as well. */
> > -		if (IS_ALDERLAKE_P(dev_priv) &&
> > -		    mode_cmd->modifier[i] != DRM_FORMAT_MOD_LINEAR &&
> > -		    !intel_fb_needs_pot_stride_remap(intel_fb) &&
> > -		    !is_power_of_2(mode_cmd->pitches[i])) {
> > -			drm_dbg_kms(&dev_priv->drm,
> > -				    "plane %d pitch (%d) must be power of two for tiled buffers\n",
> > -				    i, mode_cmd->pitches[i]);
> > -			goto err;
> > -		}
> > -
> >   		fb->obj[i] = &obj->base;
> >   	}
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > index de3ac58fceec3..9ba58d708e8ff 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > @@ -1373,13 +1373,28 @@ intel_rotate_pages(struct intel_rotation_info *rot_info,
> >   }
> >   static struct scatterlist *
> > -remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
> > +remap_pages(struct drm_i915_gem_object *obj,
> > +	    unsigned int offset, unsigned int alignment_pad,
> >   	    unsigned int width, unsigned int height,
> >   	    unsigned int src_stride, unsigned int dst_stride,
> >   	    struct sg_table *st, struct scatterlist *sg)
> >   {
> >   	unsigned int row;
> > +	if (alignment_pad) {
> > +		st->nents++;
> > +
> > +		/*
> > +		 * The DE ignores the PTEs for the padding tiles, the sg entry
> > +		 * here is just a convenience to indicate how many padding PTEs
> > +		 * to insert at this spot.
> > +		 */
> > +		sg_set_page(sg, NULL, alignment_pad * 4096, 0);
> > +		sg_dma_address(sg) = 0;
> > +		sg_dma_len(sg) = alignment_pad * 4096;
> > +		sg = sg_next(sg);
> > +	}
> > +
> >   	for (row = 0; row < height; row++) {
> >   		unsigned int left = width * I915_GTT_PAGE_SIZE;
> > @@ -1439,6 +1454,7 @@ intel_remap_pages(struct intel_remapped_info *rem_info,
> >   	struct drm_i915_private *i915 = to_i915(obj->base.dev);
> >   	struct sg_table *st;
> >   	struct scatterlist *sg;
> > +	unsigned int gtt_offset = 0;
> >   	int ret = -ENOMEM;
> >   	int i;
> > @@ -1455,10 +1471,18 @@ intel_remap_pages(struct intel_remapped_info *rem_info,
> >   	sg = st->sgl;
> >   	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
> > -		sg = remap_pages(obj, rem_info->plane[i].offset,
> > +		unsigned int alignment_pad = 0;
> > +
> > +		if (rem_info->plane_alignment)
> > +			alignment_pad = ALIGN(gtt_offset, rem_info->plane_alignment) - gtt_offset;
> > +
> > +		sg = remap_pages(obj,
> > +				 rem_info->plane[i].offset, alignment_pad,
> >   				 rem_info->plane[i].width, rem_info->plane[i].height,
> >   				 rem_info->plane[i].src_stride, rem_info->plane[i].dst_stride,
> >   				 st, sg);
> > +
> > +		gtt_offset += alignment_pad + rem_info->plane[i].dst_stride * rem_info->plane[i].height;
> >   	}
> >   	i915_sg_trim(st);
> > diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
> > index 995b502d7e5d9..80e93bf00f2e5 100644
> > --- a/drivers/gpu/drm/i915/i915_vma_types.h
> > +++ b/drivers/gpu/drm/i915/i915_vma_types.h
> > @@ -105,8 +105,9 @@ struct intel_remapped_plane_info {
> >   } __packed;
> >   struct intel_remapped_info {
> > -	struct intel_remapped_plane_info plane[2];
> > -	u32 unused_mbz;
> > +	struct intel_remapped_plane_info plane[4];
> > +	/* in gtt pages */
> > +	u32 plane_alignment;
> >   } __packed;
> >   struct intel_rotation_info {
> > @@ -129,7 +130,7 @@ static inline void assert_i915_gem_gtt_types(void)
> >   {
> >   	BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 2 * sizeof(u32) + 8 * sizeof(u16));
> >   	BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int));
> > -	BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 3 * sizeof(u32) + 8 * sizeof(u16));
> > +	BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 5 * sizeof(u32) + 16 * sizeof(u16));
> >   	/* Check that rotation/remapped shares offsets for simplicity */
> >   	BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
> > 
> 


More information about the Intel-gfx mailing list