[Intel-gfx] [PATCH 2/2] drm/i915: Make the physical object coherent with GTT

Ville Syrjälä ville.syrjala at linux.intel.com
Fri May 2 19:17:22 CEST 2014


On Fri, Apr 18, 2014 at 08:27:04AM +0100, Chris Wilson wrote:
> Currently objects for which the hardware needs a contiguous physical
> address are allocated a shadow backing storage to satisfy the contraint.
> This shadow buffer is not wired into the normal obj->pages and so the
> physical object is incoherent with accesses via the GPU, GTT and CPU. By
> setting up the appropriate scatter-gather table, we can allow userspace
> to access the physical object via either a GTT mmaping of or by rendering
> into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
> storage coherent with the contiguous shadow is not yet possible.
> Fortuituously, CPU mmaps of objects requiring physical addresses are not
> expected to be coherent anyway.
> 
> This allows the physical constraint of the GEM object to be transparent
> to userspace and allow it to efficiently render into or update them via
> the GTT and GPU.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_dma.c |   3 +
>  drivers/gpu/drm/i915/i915_gem.c | 182 +++++++++++++++++++++++++++-------------
>  include/uapi/drm/i915_drm.h     |   1 +
>  3 files changed, 129 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 12571aac7516..dd43fdc736ac 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1022,6 +1022,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  	case I915_PARAM_CMD_PARSER_VERSION:
>  		value = i915_cmd_parser_get_version();
>  		break;
> +	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
> +		value = 1;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e302a23031fe..3eb5c07e1018 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -207,41 +207,129 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
> +static int
> +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>  {
> -	drm_dma_handle_t *phys = obj->phys_handle;
> +	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> +	char *vaddr = obj->phys_handle->vaddr;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +	int i;
>  
> -	if (!phys)
> -		return;
> +	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +		struct page *page;
> +		char *src;
> +
> +		page = shmem_read_mapping_page(mapping, i);
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);
> +
> +		src = kmap_atomic(page);
> +		memcpy(vaddr, src, PAGE_SIZE);
> +		kunmap_atomic(src);
> +
> +		page_cache_release(page);
> +		vaddr += PAGE_SIZE;
> +	}
> +
> +	drm_clflush_virt_range(obj->phys_handle->vaddr, obj->base.size);

No longer wc/uc so now we flush.

> +	i915_gem_chipset_flush(obj->base.dev);
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL)
> +		return -ENOMEM;
> +
> +	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
> +		kfree(st);
> +		return -ENOMEM;
> +	}
> +
> +	sg = st->sgl;
> +	sg->offset = 0;
> +	sg->length = obj->base.size;
> +
> +	sg_dma_address(sg) = obj->phys_handle->busaddr;
> +	sg_dma_len(sg) = obj->base.size;
>  
> -	if (obj->madv == I915_MADV_WILLNEED) {
> +	obj->pages = st;
> +	obj->has_dma_mapping = true;
> +	return 0;
> +}
> +
> +static void
> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
> +{
> +	int ret;
> +
> +	BUG_ON(obj->madv == __I915_MADV_PURGED);
> +
> +	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> +	if (ret) {
> +		/* In the event of a disaster, abandon all caches and
> +		 * hope for the best.
> +		 */
> +		WARN_ON(ret != -EIO);
> +		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	}
> +
> +	if (obj->madv == I915_MADV_DONTNEED)
> +		obj->dirty = 0;
> +
> +	if (obj->dirty) {
>  		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> -		char *vaddr = phys->vaddr;
> +		char *vaddr = obj->phys_handle->vaddr;
>  		int i;
>  
>  		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -			struct page *page = shmem_read_mapping_page(mapping, i);
> -			if (!IS_ERR(page)) {
> -				char *dst = kmap_atomic(page);
> -				memcpy(dst, vaddr, PAGE_SIZE);
> -				kunmap_atomic(dst);
> +			struct page *page;
> +			char *dst;
> +
> +			page = shmem_read_mapping_page(mapping, i);
> +			if (IS_ERR(page))
> +				continue;
>  
> -				drm_clflush_pages(&page, 1);
> +			dst = kmap_atomic(page);
> +			memcpy(dst, vaddr, PAGE_SIZE);
> +			kunmap_atomic(dst);
>  
> -				set_page_dirty(page);
> +			set_page_dirty(page);
> +			if (obj->madv == I915_MADV_WILLNEED)
>  				mark_page_accessed(page);
> -				page_cache_release(page);
> -			}
> +			page_cache_release(page);
>  			vaddr += PAGE_SIZE;
>  		}
> -		i915_gem_chipset_flush(obj->base.dev);
> +		obj->dirty = 0;
>  	}
>  
> -#ifdef CONFIG_X86
> -	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	drm_pci_free(obj->base.dev, phys);

This gets leaked.

I suppose you should still keep i915_gem_object_detach_phys() around.

> -	obj->phys_handle = NULL;
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +
> +	obj->has_dma_mapping = false;
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
> +	.get_pages = i915_gem_object_get_pages_phys,
> +	.put_pages = i915_gem_object_put_pages_phys,
> +};
> +
> +static int
> +drop_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_vma *vma, *next;
> +	int ret;
> +
> +	drm_gem_object_reference(&obj->base);
> +	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
> +		if (i915_vma_unbind(vma))
> +			break;
> +
> +	ret = i915_gem_object_put_pages(obj);
> +	drm_gem_object_unreference(&obj->base);
> +
> +	return ret;
>  }
>  
>  int
> @@ -249,9 +337,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  			    int align)
>  {
>  	drm_dma_handle_t *phys;
> -	struct address_space *mapping;
> -	char *vaddr;
> -	int i;
> +	int ret;
>  
>  	if (obj->phys_handle) {
>  		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
> @@ -266,36 +352,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  	if (obj->base.filp == NULL)
>  		return -EINVAL;
>  
> +	ret = drop_pages(obj);
> +	if (ret)
> +		return ret;
> +
>  	/* create a new object */
>  	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
>  	if (!phys)
>  		return -ENOMEM;
>  
> -	vaddr = phys->vaddr;
> -#ifdef CONFIG_X86
> -	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	mapping = file_inode(obj->base.filp)->i_mapping;
> -	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -		struct page *page;
> -		char *src;
> -
> -		page = shmem_read_mapping_page(mapping, i);
> -		if (IS_ERR(page))
> -			return PTR_ERR(page);
> -
> -		src = kmap_atomic(page);
> -		memcpy(vaddr, src, PAGE_SIZE);
> -		kunmap_atomic(src);
> -
> -		mark_page_accessed(page);
> -		page_cache_release(page);
> -
> -		vaddr += PAGE_SIZE;
> -	}
> -
>  	obj->phys_handle = phys;
> -	return 0;
> +	obj->ops = &i915_gem_phys_ops;
> +
> +	return i915_gem_object_get_pages(obj);
>  }
>  
>  static int
> @@ -321,6 +390,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>  			return -EFAULT;
>  	}
>  
> +	drm_clflush_virt_range(vaddr, args->size);

And need to flush again. Check.

The patch looks sane enough to me, save for the phys_handle leak.

>  	i915_gem_chipset_flush(dev);
>  	return 0;
>  }
> @@ -1184,11 +1254,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  	 * pread/pwrite currently are reading and writing from the CPU
>  	 * perspective, requiring manual detiling by the client.
>  	 */
> -	if (obj->phys_handle) {
> -		ret = i915_gem_phys_pwrite(obj, args, file);
> -		goto out;
> -	}
> -
>  	if (obj->tiling_mode == I915_TILING_NONE &&
>  	    (obj->base.filp == NULL ||
>  	     (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> @@ -1199,8 +1264,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  		 * textures). Fallback to the shmem path in that case. */
>  	}
>  
> -	if (ret == -EFAULT || ret == -ENOSPC)
> -		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	if (ret == -EFAULT || ret == -ENOSPC) {
> +		if (obj->phys_handle)
> +			ret = i915_gem_phys_pwrite(obj, args, file);
> +		else
> +			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	}
>  
>  out:
>  	drm_gem_object_unreference(&obj->base);
> @@ -3658,7 +3727,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	 * Stolen memory is always coherent with the GPU as it is explicitly
>  	 * marked as wc by the system, or the system is cache-coherent.
>  	 */
> -	if (obj->stolen)
> +	if (obj->stolen || obj->phys_handle)
>  		return false;
>  
>  	/* If the GPU is snooping the contents of the CPU cache,
> @@ -4518,7 +4587,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	i915_gem_object_put_pages(obj);
>  	i915_gem_object_free_mmap_offset(obj);
>  	i915_gem_object_release_stolen(obj);
> -	i915_gem_object_detach_phys(obj);
>  
>  	BUG_ON(obj->pages);
>  
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index beee13a53fbb..cf1425c9f54e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
>  #define I915_PARAM_HAS_WT     	 	 27
>  #define I915_PARAM_CMD_PARSER_VERSION	 28
> +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> -- 
> 1.9.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC



More information about the Intel-gfx mailing list