[PATCH v4 7/7] drm/panthor: Add support for repeated mappings

Caterina Shablia caterina.shablia at collabora.com
Tue Jul 15 15:17:50 UTC 2025


El viernes, 11 de julio de 2025 16:03:26 (hora de verano de Europa central), 
Steven Price escribió:
> On 07/07/2025 18:04, Caterina Shablia wrote:
> > From: Boris Brezillon <boris.brezillon at collabora.com>
> > 
> > This allows us to optimize mapping of a relatively small
> > portion of a BO over and over in a large VA range, which
> > is useful to support Vulkan sparse bindings in an efficient
> > way.
> > 
> > Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
> > Co-developed-by: Caterina Shablia <caterina.shablia at collabora.com>
> > Signed-off-by: Caterina Shablia <caterina.shablia at collabora.com>
> 
> This looks like the right sort of shape. From an uAPI perspective I'm
> not sure whether u32 is the right type for bo_repeat_range. While I
> can't immediately see a use for having a large repeat range it seems a
> little silly to close it off when we're going to have padding afterwards
> anyway. Obviously the kernel would reject large values because the
> internal APIs are only u32. But it would enable this to be fixed if we
> ever discover a usecase without a uAPI change.
> 
> > ---
> > 
> >  drivers/gpu/drm/panthor/panthor_drv.c |  3 +-
> >  drivers/gpu/drm/panthor/panthor_mmu.c | 78 ++++++++++++++++++++++++---
> >  include/uapi/drm/panthor_drm.h        | 23 ++++++++
> >  3 files changed, 95 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/panthor/panthor_drv.c
> > b/drivers/gpu/drm/panthor/panthor_drv.c index 1116f2d2826e..585c07b07c42
> > 100644
> > --- a/drivers/gpu/drm/panthor/panthor_drv.c
> > +++ b/drivers/gpu/drm/panthor/panthor_drv.c
> > @@ -1608,6 +1608,7 @@ static void panthor_debugfs_init(struct drm_minor
> > *minor)> 
> >   * - 1.3 - adds DRM_PANTHOR_GROUP_STATE_INNOCENT flag
> >   * - 1.4 - adds DRM_IOCTL_PANTHOR_BO_SET_LABEL ioctl
> >   * - 1.5 - adds DRM_PANTHOR_SET_USER_MMIO_OFFSET ioctl
> > 
> > + * - 1.6 - adds DRM_PANTHOR_VM_BIND_OP_MAP_REPEAT flag
> > 
> >   */
> >  
> >  static const struct drm_driver panthor_drm_driver = {
> >  
> >  	.driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ |
> > 
> > @@ -1621,7 +1622,7 @@ static const struct drm_driver panthor_drm_driver =
> > {
> > 
> >  	.name = "panthor",
> >  	.desc = "Panthor DRM driver",
> >  	.major = 1,
> > 
> > -	.minor = 5,
> > +	.minor = 6,
> > 
> >  	.gem_create_object = panthor_gem_create_object,
> >  	.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
> > 
> > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c
> > b/drivers/gpu/drm/panthor/panthor_mmu.c index f0a22b775958..4ce9fff67d69
> > 100644
> > --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> > @@ -202,6 +202,9 @@ struct panthor_vm_op_ctx {
> > 
> >  		/** @map.bo_offset: Offset in the buffer object. */
> >  		u64 bo_offset;
> > 
> > +		/** @map.bo_repeat_range: Repeated BO range. */
> > +		u32 bo_repeat_range;
> > +
> > 
> >  		/**
> >  		
> >  		 * @map.sgt: sg-table pointing to pages backing the GEM 
object.
> >  		 *
> > 
> > @@ -1007,6 +1010,26 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64
> > iova, int prot,> 
> >  	return 0;
> >  
> >  }
> > 
> > +static int
> > +panthor_vm_repeated_map_pages(struct panthor_vm *vm, u64 iova, int prot,
> > +			      struct sg_table *sgt, u64 offset, u64 
size,
> > +			      u64 count)
> > +{
> > +	/* FIXME: we really need to optimize this at the io_pgtable level. 
*/
> 
> Do you have plans for optimizing this? 
I personally don't have any plans, no, but maybe Boris does. I'll forward this 
question to him once he's back from his vacation.
> How bad is the performance
> without optimizing?
It's better than the alternative of poking vm_bind with a morbillion 
drm_panthor_vm_bind_ops. More seriously, I don't really have any workloads 
beside VK CTS to measure, for now. There's some stuff we should try to do in 
panvk first, like using a 2M dummy_page and doing some gymnastics when mapping 
it so we get huge mappings, which will hopefully lessen the pressure on this 
loop.
> 
> > +	for (u64 i = 0; i < count; i++) {
> > +		int ret;
> > +
> > +		ret = panthor_vm_map_pages(vm, iova + (size * i), prot,
> > +					   sgt, offset, size);
> > +		if (ret) {
> > +			panthor_vm_unmap_pages(vm, iova, size * (i - 
1));
> > +			return ret;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > 
> >  static int flags_to_prot(u32 flags)
> >  {
> >  
> >  	int prot = 0;
> > 
> > @@ -1203,12 +1226,14 @@ panthor_vm_op_ctx_prealloc_vmas(struct
> > panthor_vm_op_ctx *op_ctx)> 
> >  	(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \
> >  	
> >  	 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \
> >  	 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED | \
> > 
> > +	 DRM_PANTHOR_VM_BIND_OP_MAP_REPEAT | \
> > 
> >  	 DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
> >  
> >  static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx
> >  *op_ctx,
> >  
> >  					 struct panthor_vm 
*vm,
> >  					 struct 
panthor_gem_object *bo,
> >  					 u64 offset,
> > 
> > +					 u32 repeat_range,
> > 
> >  					 u64 size, u64 va,
> >  					 u32 flags)
> >  
> >  {
> > 
> > @@ -1224,9 +1249,22 @@ static int panthor_vm_prepare_map_op_ctx(struct
> > panthor_vm_op_ctx *op_ctx,> 
> >  	    (flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) !=
> >  	    DRM_PANTHOR_VM_BIND_OP_TYPE_MAP)>  		
> >  		return -EINVAL;
> > 
> > -	/* Make sure the VA and size are aligned and in-bounds. */
> > -	if (size > bo->base.base.size || offset > bo->base.base.size - 
size)
> > -		return -EINVAL;
> > +	if (!(flags & DRM_PANTHOR_VM_BIND_OP_MAP_REPEAT)) {
> > +		/* Make sure the VA and size are in-bounds. */
> > +		if (size > bo->base.base.size || offset > bo-
>base.base.size - size)
> > +			return -EINVAL;
> > +	} else {
> > +		/* Make sure the repeat_range is in-bounds. */
> > +		if (repeat_range > bo->base.base.size || offset > bo-
>base.base.size -
> > repeat_range) +			return -EINVAL;
> > +
> > +		/* Make sure size is a multiple of repeat_range */
> > +
> > +		u64 repeat_count = size;
> > +
> > +		if (do_div(repeat_count, repeat_range))
> > +			return -EINVAL;
> > +	}
> > 
> >  	/* If the BO has an exclusive VM attached, it can't be mapped to 
other
> >  	VMs. */ if (bo->exclusive_vm_root_gem &&
> > 
> > @@ -1295,6 +1333,7 @@ static int panthor_vm_prepare_map_op_ctx(struct
> > panthor_vm_op_ctx *op_ctx,> 
> >  		drm_gem_shmem_unpin(&bo->base);
> >  	
> >  	op_ctx->map.bo_offset = offset;
> > 
> > +	op_ctx->map.bo_repeat_range = repeat_range;
> > 
> >  	/* L1, L2 and L3 page tables.
> >  	
> >  	 * We could optimize L3 allocation by iterating over the sgt and 
merging
> > 
> > @@ -2112,9 +2151,22 @@ static int panthor_gpuva_sm_step_map(struct
> > drm_gpuva_op *op, void *priv)> 
> >  	panthor_vma_init(vma, op_ctx->flags & PANTHOR_VM_MAP_FLAGS);
> > 
> > -	ret = panthor_vm_map_pages(vm, op->map.va.addr,
> > flags_to_prot(vma->flags), -				   
op_ctx->map.sgt, op->map.gem.offset,
> > -				   op->map.va.range);
> > +	if (op_ctx->flags & DRM_PANTHOR_VM_BIND_OP_MAP_REPEAT) {
> > +		u64 repeat_count = op->map.va.range;
> > +
> > +		do_div(repeat_count, op->map.gem.range);
> > +		ret = panthor_vm_repeated_map_pages(vm, op-
>map.va.addr,
> > +						    
flags_to_prot(vma->flags),
> > +						    op_ctx-
>map.sgt,
> > +						    op-
>map.gem.offset,
> > +						    op-
>map.gem.range,
> > +						    
repeat_count);
> > +	} else {
> > +		ret = panthor_vm_map_pages(vm, op->map.va.addr,
> > +					   flags_to_prot(vma-
>flags),
> > +					   op_ctx->map.sgt, 
op->map.gem.offset,
> > +					   op->map.va.range);
> > +	}
> > 
> >  	if (ret)
> >  	
> >  		return ret;
> > 
> > @@ -2237,7 +2289,7 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
> > panthor_vm_op_ctx *op,> 
> >  	switch (op_type) {
> >  	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: {
> > 
> > -		const struct drm_gpuvm_map_req map_req = {
> > +		struct drm_gpuvm_map_req map_req = {
> > 
> >  			.va.addr = op->va.addr,
> >  			.va.range = op->va.range,
> >  			.gem.obj = op->map.vm_bo->obj,
> > 
> > @@ -2249,6 +2301,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
> > panthor_vm_op_ctx *op,> 
> >  			break;
> >  		
> >  		}
> > 
> > +		if (op->flags & DRM_PANTHOR_VM_BIND_OP_MAP_REPEAT) {
> > +			map_req.flags |= DRM_GPUVA_REPEAT;
> > +			map_req.gem.range = op->map.bo_repeat_range;
> > +		}
> > +
> > 
> >  		ret = drm_gpuvm_sm_map(&vm->base, vm, &map_req);
> >  		break;
> >  	
> >  	}
> > 
> > @@ -2497,6 +2554,7 @@ panthor_vm_bind_prepare_op_ctx(struct drm_file
> > *file,
> > 
> >  		ret = panthor_vm_prepare_map_op_ctx(op_ctx, vm,
> >  		
> >  						    gem ? 
to_panthor_bo(gem) : NULL,
> >  						    op-
>bo_offset,
> > 
> > +						    op-
>bo_repeat_range,
> > 
> >  						    op-
>size,
> >  						    op->va,
> >  						    op-
>flags);
> > 
> > @@ -2698,7 +2756,11 @@ int panthor_vm_map_bo_range(struct panthor_vm *vm,
> > struct panthor_gem_object *bo> 
> >  	struct panthor_vm_op_ctx op_ctx;
> >  	int ret;
> > 
> > -	ret = panthor_vm_prepare_map_op_ctx(&op_ctx, vm, bo, offset, size, 
va,
> > flags); +	/* TODO: would be nice to replace with assert instead */
> 
> If you don't expect this to happen then this can be a "if (WARN_ON(...))".
> 
> > +	if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_REPEAT)
> > +		return -EINVAL;
> > +
> > +	ret = panthor_vm_prepare_map_op_ctx(&op_ctx, vm, bo, offset, 0, 
size,
> > va, flags);> 
> >  	if (ret)
> >  	
> >  		return ret;
> > 
> > diff --git a/include/uapi/drm/panthor_drm.h
> > b/include/uapi/drm/panthor_drm.h index e1f43deb7eca..ad278bc234b0 100644
> > --- a/include/uapi/drm/panthor_drm.h
> > +++ b/include/uapi/drm/panthor_drm.h
> > @@ -496,6 +496,17 @@ enum drm_panthor_vm_bind_op_flags {
> > 
> >  	 */
> >  	
> >  	DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED = 1 << 2,
> > 
> > +	/**
> > +	 * @DRM_PANTHOR_VM_BIND_OP_MAP_REPEAT: Repeat a BO range
> > +	 *
> > +	 * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP.
> > +	 *
> > +	 * When this is set, a BO range is repeated over the VA range.
> > +	 * drm_panthor_vm_bind_op::bo_repeat_range defines the size of the
> > +	 * BO range to repeat.
> > +	 */
> > +	DRM_PANTHOR_VM_BIND_OP_MAP_REPEAT = 1 << 3,
> > +
> > 
> >  	/**
> >  	
> >  	 * @DRM_PANTHOR_VM_BIND_OP_TYPE_MASK: Mask used to determine the 
type of
> >  	 operation. */
> > 
> > @@ -560,6 +571,18 @@ struct drm_panthor_vm_bind_op {
> > 
> >  	 */
> >  	
> >  	struct drm_panthor_obj_array syncs;
> > 
> > +	/**
> > +	 * @bo_repeat_range: The size of the range to be repeated.
> > +	 *
> > +	 * Must be zero if DRM_PANTHOR_VM_BIND_OP_MAP_REPEAT is not set in
> > +	 * flags.
> > +	 *
> > +	 * Size must be a multiple of bo_repeat_range.
> > +	 */
> > +	__u32 bo_repeat_range;
> > +
> > +	/** @pad: Padding field. MBZ. */
> > +	__u32 pad;
> 
> If we're going to have the padding then the kernel needs to check that
> this padding is zero, so that it can be available for future use.
I decided to go with your suggestion to change bo_repeat_range to be an __u64, 
but rejecting vm_binds with values above 2^32-1 for now.
> 
> Steve
> 
> >  };
> >  
> >  /**






More information about the dri-devel mailing list