[Intel-gfx] [RFC] drm/i915: Add variable gem object size support to i915

Siluvery, Arun arun.siluvery at linux.intel.com
Sat May 10 15:42:32 CEST 2014


On 09/05/2014 22:18, Volkin, Bradley D wrote:
> On Mon, Apr 28, 2014 at 08:01:29AM -0700, arun.siluvery at linux.intel.com wrote:
>> From: "Siluvery, Arun" <arun.siluvery at intel.com>
>>
>> This patch adds support to have gem objects of variable size.
>> The size of the gem object obj->size is always constant and this fact
>> is tightly coupled in the driver; this implementation allows to vary
>> its effective size using an interface similar to fallocate().
>>
>> A new ioctl() is introduced to mark a range as scratch/usable.
>> Once marked as scratch, associated backing store is released and the
>> region is filled with scratch pages. The region can also be unmarked
>> at a later point in which case new backing pages are created.
>> The range can be anywhere within the object space, it can have multiple
>> ranges possibly overlapping forming a large contiguous range.
>>
>> There is only one single scratch page and Kernel allows to write to this
>> page; userspace need to keep track of scratch page range otherwise any
>> subsequent writes to these pages will overwrite previous content.
>>
>> This feature is useful where the exact size of the object is not clear
>> at the time of its creation, in such case we usually create an object
>> with more than the required size but end up using it partially.
>> In devices where there are tight memory constraints it would be useful
>> to release that additional space which is currently unused. Using this
>> interface the region can be simply marked as scratch which releases
>> its backing store thus reducing the memory pressure on the kernel.
>>
>> Many thanks to Daniel, ChrisW, Tvrtko, Bob for the idea and feedback
>> on this implementation.
>>
>> v2: fix holes in error handling and use consistent data types (Tvrtko)
>>   - If page allocation fails simply return error; do not try to invoke
>>     shrinker to free backing store.
>>   - Release new pages created by us in case of error during page allocation
>>     or sg_table update.
>>   - Use 64-bit data types for start and length values to avoid truncation.
>>
>> Change-Id: Id3339be95dbb6b5c69c39d751986c40ec0ccdaf8
>> Signed-off-by: Siluvery, Arun <arun.siluvery at intel.com>
>> ---
>>
>> Please let me know if I need to submit this as PATCH instead of RFC.
>> Since this is RFC I have included all changes as a single patch.
>
> Hi Arun,
>
> For a change of this size, one patch seems fine to me. I think RFC vs.
> PATCH is more a comment of what state you think the patch is in and
> what level of feedback you're looking for.
>
Hi Brad,

Thanks for your comments. The patch is complete and functional.
I am looking for any feedback to improve it further.

>>
>>   drivers/gpu/drm/i915/i915_dma.c |   1 +
>>   drivers/gpu/drm/i915/i915_drv.h |   2 +
>>   drivers/gpu/drm/i915/i915_gem.c | 205 ++++++++++++++++++++++++++++++++++++++++
>>   include/uapi/drm/i915_drm.h     |  31 ++++++
>>   4 files changed, 239 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
>> index 31c499f..3dd4b1a 100644
>> --- a/drivers/gpu/drm/i915/i915_dma.c
>> +++ b/drivers/gpu/drm/i915/i915_dma.c
>> @@ -2000,6 +2000,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
>>   	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>   	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, \
>>   						DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> +	DRM_IOCTL_DEF_DRV(I915_GEM_FALLOCATE, i915_gem_fallocate_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>   	DRM_IOCTL_DEF_DRV(I915_SET_PLANE_180_ROTATION, \
>>   		i915_set_plane_180_rotation, DRM_AUTH | DRM_UNLOCKED),
>>   	DRM_IOCTL_DEF_DRV(I915_ENABLE_PLANE_RESERVED_REG_BIT_2,
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 4069800..1f30fb6 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2210,6 +2210,8 @@ int i915_gem_get_tiling(struct drm_device *dev, void *data,
>>   int i915_gem_init_userptr(struct drm_device *dev);
>>   int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
>>   				struct drm_file *file);
>> +int i915_gem_fallocate_ioctl(struct drm_device *dev, void *data,
>> +				struct drm_file *file);
>>   int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>>   				struct drm_file *file_priv);
>>   int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 6153e01..a0188ee 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -317,6 +317,211 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
>>   			       args->size, &args->handle);
>>   }
>>
>> +static int i915_gem_obj_fallocate(struct drm_i915_gem_object *obj,
>> +				  bool mark_scratch, uint64_t start,
>> +				  uint64_t length)
>> +{
>> +	int i, j;
>> +	int ret;
>> +	uint32_t start_page, end_page;
>> +	uint32_t page_count;
>> +	gfp_t gfp;
>> +	bool update_sg_table = false;
>> +	unsigned long scratch_pfn;
>> +	struct page *scratch;
>> +	struct page **pages;
>> +	struct sg_table *sg = NULL;
>> +	struct sg_page_iter sg_iter;
>> +	struct address_space *mapping;
>> +	struct drm_i915_private *dev_priv;
>> +
>> +	dev_priv = obj->base.dev->dev_private;
>> +	start_page = start >> PAGE_SHIFT;
>> +	end_page = (start + length) >> PAGE_SHIFT;
>> +	page_count = obj->base.size >> PAGE_SHIFT;
>> +
>> +	pages = drm_malloc_ab(page_count, sizeof(*pages));
>> +	if (pages == NULL)
>> +		return -ENOMEM;
>> +
>> +	i = 0;
>> +	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
>> +		pages[i] = sg_page_iter_page(&sg_iter);
>> +		i++;
>> +	}
>> +
>> +	mapping = file_inode(obj->base.filp)->i_mapping;
>> +	gfp = mapping_gfp_mask(mapping);
>> +	gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
>> +	gfp &= ~(__GFP_IO | __GFP_WAIT);
>> +	scratch = dev_priv->gtt.base.scratch.page;
>> +	scratch_pfn = page_to_pfn(scratch);
>> +
>> +	if (mark_scratch) {
>> +		/* invalidate page range */
>> +		for (i = start_page; i < end_page; ++i) {
>> +			if (scratch_pfn == page_to_pfn(pages[i]))
>> +				continue;
>> +
>> +			update_sg_table = true;
>> +			drm_clflush_pages((pages + i), 1);
>> +			if (obj->dirty)
>> +				set_page_dirty(pages[i]);
>> +			page_cache_release(pages[i]);
>> +			pages[i] = scratch;
>> +		}
>> +	} else {
>> +		struct page *page;
>> +		/* allocate new pages */
>> +		for (i = start_page; i < end_page; ++i) {
>> +			if (page_to_pfn(pages[i]) != scratch_pfn)
>> +				continue;
>> +
>> +			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
>> +			if (IS_ERR(page)) {
>> +				ret = PTR_ERR(page);
>> +				goto err_pages;
>> +			}
>> +
>> +			update_sg_table = true;
>> +			pages[i] = page;
>> +		}
>> +	}
>> +
>> +	if (update_sg_table == false) {
>> +		ret = 0;
>> +		goto out;
>> +	}
>> +
>> +	/**
>> +	 * easier to replace the existing sg_table with
>> +	 * the new one instead of modifying it
>> +	 */
>> +	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
>> +	if (!sg) {
>> +		ret = -ENOMEM;
>> +		goto err_pages;
>> +	}
>> +	ret = sg_alloc_table_from_pages(sg, pages, page_count, 0,
>> +					page_count << PAGE_SHIFT, gfp);
>> +	if (ret)
>> +		goto err_alloc_table;
>> +
>> +	sg_free_table(obj->pages);
>> +	kfree(obj->pages);
>> +	obj->pages = sg;
>> +
>> +	return 0;
>> +
>> +err_alloc_table:
>> +	kfree(sg);
>> +err_pages:
>> +	/*
>> +	 * In case of an error we should keep the obj in its previous state.
>> +	 *
>> +	 * case 1: when replacing obj pages with scratch pages
>> +	 *   No action required because obj has all valid pages and
>> +	 *   we cannot release the scratch page as it is used in
>> +	 *   other places.
>> +	 *
>> +	 * case 2: when replacing scratch pages with real backing store
>> +	 *   In this case free only the new pages created by us
>> +	 */
>> +	if (!mark_scratch) {
>> +		for (j = start_page; j < i; ++j)
>> +			page_cache_release(pages[j]);
>> +	}
>> +out:
>> +	if (pages)
>> +		drm_free_large(pages);
>> +
>> +	return ret;
>> +}
>> +
>> +/**
>> + * Changes the effective size of an existing gem object.
>> + * The object size is always constant and this fact is tightly
>> + * coupled in the driver. This ioctl() allows user to define
>> + * certain ranges in the obj to be marked as usable/scratch
>> + * thus modifying the effective size of the object used.
>> + * mark_scratch: specified range of pages are replaced by scratch page.
>> + * umark_scratch: scratch pages are replaced by real backing store.
>> + */
>> +int i915_gem_fallocate_ioctl(struct drm_device *dev,
>> +			     void *data, struct drm_file *file)
>> +{
>> +	int ret;
>> +	bool mark_scratch = false;
>> +	uint64_t start, length;
>> +	struct i915_vma *vma;
>> +	struct drm_i915_private *dev_priv;
>> +	struct drm_i915_gem_object *obj;
>> +	struct i915_address_space *vm;
>> +	struct drm_i915_gem_fallocate *args = data;
>> +
>> +	if (!((args->mode & I915_GEM_FALLOC_MARK_SCRATCH) ^
>> +	       ((args->mode & I915_GEM_FALLOC_UNMARK_SCRATCH) >> 1)))
>> +		return -EINVAL;
>> +
>> +	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
>> +	if (&obj->base == NULL)
>> +		return -ENOENT;
>
> Since the fallocate() code requires shmem backing, we need this check:
>
> 	if (!obj->base.filp)
> 		return -EINVAL;

ok, I will add this check.
>
>> +
>> +	start = roundup(args->start, PAGE_SIZE);
>> +	length = roundup(args->length, PAGE_SIZE);
>
> Rounding up the start to avoid throwing away valid data on the start page
> makes sense to me. But shouldn't we then round the length down to avoid
> throwing away valid data on the end page, after the specified range?
>
> Or we could just require page aligned start and length and return error
> otherwise; that's what the userptr ioctl does.

I agree page aligned start, length is better.
>
>> +	if (length == 0
>> +	    || length > obj->base.size
>> +	    || start > obj->base.size
>> +	    || (start + length) > obj->base.size)
>> +		return -EINVAL;
>> +
>> +	dev_priv = dev->dev_private;
>> +	vm = &dev_priv->gtt.base;
>> +
>> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
>
> Should we use i915_mutex_lock_interruptible() here? I'm not entirely clear
> on when that's required vs. just mutex_lock_interruptible.
>
i915_mutex_lock_interruptible() is checking for reset before acquiring 
mutex; I will use the same as it is preferred in all the cases.

>> +	if (ret)
>> +		return ret;
>> +
>> +	if (!i915_gem_obj_bound(obj, vm)) {
>> +		ret = i915_gem_object_bind_to_vm(obj, vm, 0, true, false);
>> +		if (ret)
>> +			goto unlock;
>> +
>> +		if (!dev_priv->mm.aliasing_ppgtt)
>> +			i915_gem_gtt_bind_object(obj, obj->cache_level);
>> +	}
>> +
>> +	drm_gem_object_reference(&obj->base);
>> +
>> +	vma = i915_gem_obj_to_vma(obj, vm);
>> +	if (!vma) {
>> +		ret = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	ret = i915_vma_unbind(vma);
>> +	if (ret)
>> +		goto out;
>
> Hmm, can you elaborate on the need for this section a bit? I don't
> really follow what we're doing here. I can see needing to unbind an
> object that is bound in order to change the page table entries. I
> guess I just don't understand the specific implementation.
>
> For example, why do we need to bind an unbound object just to unbind
> it again? Should we even allow fallocate() on such an object? And we
> only bind/unbind from GGTT; what about PPGTTs?
>
This is the bit I am not clear as well.
This is mainly added to cover the case where an object is created but 
not yet bound. I don't know whether it is to be allowed or not.
I can change this if we should not allow fallocate on unbound objects.

bind/unbind functions are already considering aliased ppgtt case.

>> +
>> +	mark_scratch =
>> +		(args->mode & I915_GEM_FALLOC_MARK_SCRATCH) ? true : false;
>> +	ret = i915_gem_obj_fallocate(obj, mark_scratch, start, length);
>> +	if (ret) {
>> +		DRM_ERROR("fallocating specified obj range failed\n");
>> +		goto out;
>> +	}
>> +
>> +	ret = i915_gem_object_bind_to_vm(obj, vm, 0, true, false);
>> +	if (ret)
>> +		DRM_ERROR("object couldn't be bound after falloc\n");
>> +
>> +out:
>> +	drm_gem_object_unreference(&obj->base);
>> +unlock:
>> +	mutex_unlock(&dev->struct_mutex);
>> +	return ret;
>> +}
>> +
>>   static inline int
>>   __copy_to_user_swizzled(char __user *cpu_vaddr,
>>   			const char *gpu_vaddr, int gpu_offset,
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index aa8469e..0d63fc8 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -275,6 +275,7 @@ struct csc_coeff {
>>   #define DRM_I915_GET_RESET_STATS	0x32
>>   #define DRM_I915_SET_PLANE_ZORDER	0x33
>>   #define DRM_I915_GEM_USERPTR		0x34
>> +#define DRM_I915_GEM_FALLOCATE		0x35
>>   #define DRM_I915_SET_PLANE_180_ROTATION 0x36
>>   #define DRM_I915_ENABLE_PLANE_RESERVED_REG_BIT_2	0x37
>>   #define DRM_I915_SET_CSC		0x39
>> @@ -339,6 +340,9 @@ struct csc_coeff {
>>   #define DRM_IOCTL_I915_GEM_USERPTR \
>>   		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, \
>>   				struct drm_i915_gem_userptr)
>> +#define DRM_IOCTL_I915_GEM_FALLOCATE \
>> +		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_FALLOCATE, \
>> +				struct drm_i915_gem_fallocate)
>
> We're not returning any data in the struct, so no need for DRM_IOWR.
> Just DRM_IOW should be fine.

will fix it in next revision.

>
>>   #define DRM_IOCTL_I915_SET_PLANE_ALPHA		\
>>   			DRM_IOW(DRM_COMMAND_BASE + DRM_I915_SET_PLANE_ALPHA, \
>>   			struct drm_i915_set_plane_alpha)
>> @@ -523,6 +527,33 @@ struct drm_i915_gem_create {
>>   	__u32 pad;
>>   };
>>
>> +struct drm_i915_gem_fallocate {
>> +	/**
>> +	 * Start position of the range
>> +	 *
>> +	 * If the given value is not page-aligned it will be rounded internally.
>> +	 */
>> +	__u64 start;
>> +	/**
>> +	 * Length of the range
>> +	 *
>> +	 * If the given value is not page-aligned it will be rounded internally.
>> +	 */
>> +	__u64 length;
>> +	/**
>> +	 * Mode applied to the range
>> +	 */
>> +	__u32 mode;
>> +#define I915_GEM_FALLOC_MARK_SCRATCH        0x01
>> +#define I915_GEM_FALLOC_UNMARK_SCRATCH      0x02
>> +	/**
>> +	 * Returned handle for the object.
>> +	 *
>> +	 * Object handles are nonzero.
>> +	 */
>
> We're not actually returning the handle, it's only an input.

will fix it next revision.

>
> Thanks,
> Brad
>
>> +	__u32 handle;
>> +};
>> +
>>   struct drm_i915_gem_pread {
>>   	/** Handle for the object being read. */
>>   	__u32 handle;
>> --
>> 1.9.2
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>




More information about the Intel-gfx mailing list