[Intel-gfx] [PATCH 22/29] drm/i915: Handle stolen objects in pwrite

Daniel Vetter daniel at ffwll.ch
Mon Aug 20 21:56:08 CEST 2012


On Sat, Aug 11, 2012 at 03:41:21PM +0100, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

What about putting kmap/unmap abstractions into obj->ops (like the dma_buf
interface already has)? Since the pwrite/pread code is already rather
branch heave I hope we don't see the overhead of the indirect call even
in microbenchmarks (haven't checked). And this way we would also neatly
wrap up dma_bufs for pwrite (if anyone ever really wants that ...).

The kmap(_atomic) for stolen mem backed objects would boil down to doing
the pointer arithmetic, kunmap would be just a noop.

Cheers, Daniel
> ---
>  drivers/gpu/drm/i915/i915_gem.c |  169 +++++++++++++++++++++++++--------------
>  1 file changed, 111 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 552f95b..a2fb2aa 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -664,19 +664,17 @@ out:
>   * needs_clflush_before is set and flushes out any written cachelines after
>   * writing if needs_clflush is set. */
>  static int
> -shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
> +shmem_pwrite_fast(char *vaddr, int shmem_page_offset, int page_length,
>  		  char __user *user_data,
>  		  bool page_do_bit17_swizzling,
>  		  bool needs_clflush_before,
>  		  bool needs_clflush_after)
>  {
> -	char *vaddr;
>  	int ret;
>  
>  	if (unlikely(page_do_bit17_swizzling))
>  		return -EINVAL;
>  
> -	vaddr = kmap_atomic(page);
>  	if (needs_clflush_before)
>  		drm_clflush_virt_range(vaddr + shmem_page_offset,
>  				       page_length);
> @@ -686,7 +684,6 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
>  	if (needs_clflush_after)
>  		drm_clflush_virt_range(vaddr + shmem_page_offset,
>  				       page_length);
> -	kunmap_atomic(vaddr);
>  
>  	return ret ? -EFAULT : 0;
>  }
> @@ -694,16 +691,14 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
>  /* Only difference to the fast-path function is that this can handle bit17
>   * and uses non-atomic copy and kmap functions. */
>  static int
> -shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
> +shmem_pwrite_slow(char *vaddr, int shmem_page_offset, int page_length,
>  		  char __user *user_data,
>  		  bool page_do_bit17_swizzling,
>  		  bool needs_clflush_before,
>  		  bool needs_clflush_after)
>  {
> -	char *vaddr;
>  	int ret;
>  
> -	vaddr = kmap(page);
>  	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
>  		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
>  					     page_length,
> @@ -720,7 +715,6 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
>  		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
>  					     page_length,
>  					     page_do_bit17_swizzling);
> -	kunmap(page);
>  
>  	return ret ? -EFAULT : 0;
>  }
> @@ -731,6 +725,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  		      struct drm_i915_gem_pwrite *args,
>  		      struct drm_file *file)
>  {
> +	struct drm_i915_private *dev_priv = dev->dev_private;
>  	ssize_t remain;
>  	loff_t offset;
>  	char __user *user_data;
> @@ -770,74 +765,132 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  	if (ret)
>  		return ret;
>  
> -	i915_gem_object_pin_pages(obj);
> -
>  	offset = args->offset;
>  	obj->dirty = 1;
>  
> -	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> -		struct page *page;
> -		int partial_cacheline_write;
> +	if (obj->stolen) {
> +		while (remain > 0) {
> +			char *vaddr;
> +			int partial_cacheline_write;
>  
> -		if (i < offset >> PAGE_SHIFT)
> -			continue;
> +			/* Operation in this page
> +			 *
> +			 * shmem_page_offset = offset within page in shmem file
> +			 * page_length = bytes to copy for this page
> +			 */
> +			shmem_page_offset = offset_in_page(offset);
>  
> -		if (remain <= 0)
> -			break;
> +			page_length = remain;
> +			if ((shmem_page_offset + page_length) > PAGE_SIZE)
> +				page_length = PAGE_SIZE - shmem_page_offset;
>  
> -		/* Operation in this page
> -		 *
> -		 * shmem_page_offset = offset within page in shmem file
> -		 * page_length = bytes to copy for this page
> -		 */
> -		shmem_page_offset = offset_in_page(offset);
> +			/* If we don't overwrite a cacheline completely we need to be
> +			 * careful to have up-to-date data by first clflushing. Don't
> +			 * overcomplicate things and flush the entire patch. */
> +			partial_cacheline_write = needs_clflush_before &&
> +				((shmem_page_offset | page_length)
> +				 & (boot_cpu_data.x86_clflush_size - 1));
>  
> -		page_length = remain;
> -		if ((shmem_page_offset + page_length) > PAGE_SIZE)
> -			page_length = PAGE_SIZE - shmem_page_offset;
> +			vaddr = (char *)(dev_priv->mm.stolen_base + obj->stolen->start + offset);
> +			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> +				((uintptr_t)vaddr & (1 << 17)) != 0;
>  
> -		/* If we don't overwrite a cacheline completely we need to be
> -		 * careful to have up-to-date data by first clflushing. Don't
> -		 * overcomplicate things and flush the entire patch. */
> -		partial_cacheline_write = needs_clflush_before &&
> -			((shmem_page_offset | page_length)
> -				& (boot_cpu_data.x86_clflush_size - 1));
> +			ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length,
> +						user_data, page_do_bit17_swizzling,
> +						partial_cacheline_write,
> +						needs_clflush_after);
>  
> -		page = sg_page(sg);
> -		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> -			(page_to_phys(page) & (1 << 17)) != 0;
> +			if (ret == 0)
> +				goto next_stolen;
>  
> -		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
> -					user_data, page_do_bit17_swizzling,
> -					partial_cacheline_write,
> -					needs_clflush_after);
> -		if (ret == 0)
> -			goto next_page;
> +			hit_slowpath = 1;
> +			mutex_unlock(&dev->struct_mutex);
>  
> -		hit_slowpath = 1;
> -		mutex_unlock(&dev->struct_mutex);
> -		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
> -					user_data, page_do_bit17_swizzling,
> -					partial_cacheline_write,
> -					needs_clflush_after);
> +			ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length,
> +						user_data, page_do_bit17_swizzling,
> +						partial_cacheline_write,
> +						needs_clflush_after);
>  
> -		mutex_lock(&dev->struct_mutex);
> +			mutex_lock(&dev->struct_mutex);
> +			if (ret)
> +				goto out;
>  
> -next_page:
> -		set_page_dirty(page);
> -		mark_page_accessed(page);
> +next_stolen:
> +			remain -= page_length;
> +			user_data += page_length;
> +			offset += page_length;
> +		}
> +	} else {
> +		i915_gem_object_pin_pages(obj);
>  
> -		if (ret)
> -			goto out;
> +		for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> +			struct page *page;
> +			char *vaddr;
> +			int partial_cacheline_write;
>  
> -		remain -= page_length;
> -		user_data += page_length;
> -		offset += page_length;
> +			if (i < offset >> PAGE_SHIFT)
> +				continue;
> +
> +			if (remain <= 0)
> +				break;
> +
> +			/* Operation in this page
> +			 *
> +			 * shmem_page_offset = offset within page in shmem file
> +			 * page_length = bytes to copy for this page
> +			 */
> +			shmem_page_offset = offset_in_page(offset);
> +
> +			page_length = remain;
> +			if ((shmem_page_offset + page_length) > PAGE_SIZE)
> +				page_length = PAGE_SIZE - shmem_page_offset;
> +
> +			/* If we don't overwrite a cacheline completely we need to be
> +			 * careful to have up-to-date data by first clflushing. Don't
> +			 * overcomplicate things and flush the entire patch. */
> +			partial_cacheline_write = needs_clflush_before &&
> +				((shmem_page_offset | page_length)
> +				 & (boot_cpu_data.x86_clflush_size - 1));
> +
> +			page = sg_page(sg);
> +			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> +				(page_to_phys(page) & (1 << 17)) != 0;
> +
> +			vaddr = kmap_atomic(page);
> +			ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length,
> +						user_data, page_do_bit17_swizzling,
> +						partial_cacheline_write,
> +						needs_clflush_after);
> +
> +			kunmap_atomic(vaddr);
> +
> +			if (ret == 0)
> +				goto next_page;
> +
> +			hit_slowpath = 1;
> +			mutex_unlock(&dev->struct_mutex);
> +
> +			vaddr = kmap(page);
> +			ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length,
> +						user_data, page_do_bit17_swizzling,
> +						partial_cacheline_write,
> +						needs_clflush_after);
> +			kunmap(page);
> +
> +			mutex_lock(&dev->struct_mutex);
> +			if (ret)
> +				goto out_unpin;
> +
> +next_page:
> +			remain -= page_length;
> +			user_data += page_length;
> +			offset += page_length;
> +		}
> +out_unpin:
> +		i915_gem_object_unpin_pages(obj);
>  	}
>  
>  out:
> -	i915_gem_object_unpin_pages(obj);
> -
>  	if (hit_slowpath) {
>  		/* Fixup: Kill any reinstated backing storage pages */
>  		if (obj->madv == __I915_MADV_PURGED)
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel at ffwll.ch
Mobile: +41 (0)79 365 57 48



More information about the Intel-gfx mailing list