[Intel-gfx] sna: Experimental support for write-combining mmaps (wc-mmap)

Fri Nov 14 11:04:28 CET 2014

On Thu, Nov 13, 2014 at 04:28:46PM +0100, Sedat Dilek wrote:
> Hi,
> 
> what is the status of drm-intel-wc-mmap patchset (#2 + #3)?
> I have refreshed them on top of drm-intel-coherent-phys-gtt patch (#1).
> Playing with that againt Linux v3.18-rc4.

Waiting for the misssing testcases and remainig reviews. I tried to
volunteer Akash for that, but apparently failed.
-Daniel

> 
> Regards,
> - Sedat -

> From 20a70ef5865104f35bc8e4cd11ca8ae3b7e6051a Mon Sep 17 00:00:00 2001
> From: Chris Wilson <chris at chris-wilson.co.uk>
> Date: Tue, 4 Nov 2014 04:51:40 -0800
> Subject: [PATCH 1/3] drm/i915: Make the physical object coherent with GTT
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Currently objects for which the hardware needs a contiguous physical
> address are allocated a shadow backing storage to satisfy the contraint.
> This shadow buffer is not wired into the normal obj->pages and so the
> physical object is incoherent with accesses via the GPU, GTT and CPU. By
> setting up the appropriate scatter-gather table, we can allow userspace
> to access the physical object via either a GTT mmaping of or by rendering
> into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
> storage coherent with the contiguous shadow is not yet possible.
> Fortuituously, CPU mmaps of objects requiring physical addresses are not
> expected to be coherent anyway.
> 
> This allows the physical constraint of the GEM object to be transparent
> to userspace and allow it to efficiently render into or update them via
> the GTT and GPU.
> 
> v2: Fix leak of pci handle spotted by Ville
> v3: Remove the now duplicate call to detach_phys_object during free.
> v4: Wait for rendering before pwrite. As this patch makes it possible to
> render into the phys object, we should make it correct as well!
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Reviewed-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Signed-off-by: Daniel Vetter <daniel.vetter at ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_dma.c |   3 +
>  drivers/gpu/drm/i915/i915_drv.h |   6 +-
>  drivers/gpu/drm/i915/i915_gem.c | 207 +++++++++++++++++++++++++++-------------
>  include/uapi/drm/i915_drm.h     |   1 +
>  4 files changed, 150 insertions(+), 67 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 1403b01..9b08853 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1027,6 +1027,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  	case I915_PARAM_CMD_PARSER_VERSION:
>  		value = i915_cmd_parser_get_version();
>  		break;
> +	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
> +		value = 1;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 16a6f6d..0417784 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1864,10 +1864,10 @@ struct drm_i915_gem_object {
>  	unsigned long user_pin_count;
>  	struct drm_file *pin_filp;
>  
> -	/** for phy allocated objects */
> -	struct drm_dma_handle *phys_handle;
> -
>  	union {
> +		/** for phy allocated objects */
> +		struct drm_dma_handle *phys_handle;
> +
>  		struct i915_gem_userptr {
>  			uintptr_t ptr;
>  			unsigned read_only :1;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 28f91df..124ec85 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -208,40 +208,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
> +static int
> +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>  {
> -	drm_dma_handle_t *phys = obj->phys_handle;
> +	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> +	char *vaddr = obj->phys_handle->vaddr;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +	int i;
>  
> -	if (!phys)
> -		return;
> +	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +		struct page *page;
> +		char *src;
> +
> +		page = shmem_read_mapping_page(mapping, i);
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);
> +
> +		src = kmap_atomic(page);
> +		memcpy(vaddr, src, PAGE_SIZE);
> +		drm_clflush_virt_range(vaddr, PAGE_SIZE);
> +		kunmap_atomic(src);
> +
> +		page_cache_release(page);
> +		vaddr += PAGE_SIZE;
> +	}
> +
> +	i915_gem_chipset_flush(obj->base.dev);
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL)
> +		return -ENOMEM;
> +
> +	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
> +		kfree(st);
> +		return -ENOMEM;
> +	}
> +
> +	sg = st->sgl;
> +	sg->offset = 0;
> +	sg->length = obj->base.size;
>  
> -	if (obj->madv == I915_MADV_WILLNEED) {
> +	sg_dma_address(sg) = obj->phys_handle->busaddr;
> +	sg_dma_len(sg) = obj->base.size;
> +
> +	obj->pages = st;
> +	obj->has_dma_mapping = true;
> +	return 0;
> +}
> +
> +static void
> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
> +{
> +	int ret;
> +
> +	BUG_ON(obj->madv == __I915_MADV_PURGED);
> +
> +	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> +	if (ret) {
> +		/* In the event of a disaster, abandon all caches and
> +		 * hope for the best.
> +		 */
> +		WARN_ON(ret != -EIO);
> +		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	}
> +
> +	if (obj->madv == I915_MADV_DONTNEED)
> +		obj->dirty = 0;
> +
> +	if (obj->dirty) {
>  		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> -		char *vaddr = phys->vaddr;
> +		char *vaddr = obj->phys_handle->vaddr;
>  		int i;
>  
>  		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -			struct page *page = shmem_read_mapping_page(mapping, i);
> -			if (!IS_ERR(page)) {
> -				char *dst = kmap_atomic(page);
> -				memcpy(dst, vaddr, PAGE_SIZE);
> -				drm_clflush_virt_range(dst, PAGE_SIZE);
> -				kunmap_atomic(dst);
> -
> -				set_page_dirty(page);
> +			struct page *page;
> +			char *dst;
> +
> +			page = shmem_read_mapping_page(mapping, i);
> +			if (IS_ERR(page))
> +				continue;
> +
> +			dst = kmap_atomic(page);
> +			drm_clflush_virt_range(vaddr, PAGE_SIZE);
> +			memcpy(dst, vaddr, PAGE_SIZE);
> +			kunmap_atomic(dst);
> +
> +			set_page_dirty(page);
> +			if (obj->madv == I915_MADV_WILLNEED)
>  				mark_page_accessed(page);
> -				page_cache_release(page);
> -			}
> +			page_cache_release(page);
>  			vaddr += PAGE_SIZE;
>  		}
> -		i915_gem_chipset_flush(obj->base.dev);
> +		obj->dirty = 0;
>  	}
>  
> -#ifdef CONFIG_X86
> -	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	drm_pci_free(obj->base.dev, phys);
> -	obj->phys_handle = NULL;
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +
> +	obj->has_dma_mapping = false;
> +}
> +
> +static void
> +i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
> +{
> +	drm_pci_free(obj->base.dev, obj->phys_handle);
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
> +	.get_pages = i915_gem_object_get_pages_phys,
> +	.put_pages = i915_gem_object_put_pages_phys,
> +	.release = i915_gem_object_release_phys,
> +};
> +
> +static int
> +drop_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_vma *vma, *next;
> +	int ret;
> +
> +	drm_gem_object_reference(&obj->base);
> +	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
> +		if (i915_vma_unbind(vma))
> +			break;
> +
> +	ret = i915_gem_object_put_pages(obj);
> +	drm_gem_object_unreference(&obj->base);
> +
> +	return ret;
>  }
>  
>  int
> @@ -249,9 +346,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  			    int align)
>  {
>  	drm_dma_handle_t *phys;
> -	struct address_space *mapping;
> -	char *vaddr;
> -	int i;
> +	int ret;
>  
>  	if (obj->phys_handle) {
>  		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
> @@ -266,41 +361,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  	if (obj->base.filp == NULL)
>  		return -EINVAL;
>  
> +	ret = drop_pages(obj);
> +	if (ret)
> +		return ret;
> +
>  	/* create a new object */
>  	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
>  	if (!phys)
>  		return -ENOMEM;
>  
> -	vaddr = phys->vaddr;
> -#ifdef CONFIG_X86
> -	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	mapping = file_inode(obj->base.filp)->i_mapping;
> -	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -		struct page *page;
> -		char *src;
> -
> -		page = shmem_read_mapping_page(mapping, i);
> -		if (IS_ERR(page)) {
> -#ifdef CONFIG_X86
> -			set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -			drm_pci_free(obj->base.dev, phys);
> -			return PTR_ERR(page);
> -		}
> -
> -		src = kmap_atomic(page);
> -		memcpy(vaddr, src, PAGE_SIZE);
> -		kunmap_atomic(src);
> -
> -		mark_page_accessed(page);
> -		page_cache_release(page);
> -
> -		vaddr += PAGE_SIZE;
> -	}
> -
>  	obj->phys_handle = phys;
> -	return 0;
> +	obj->ops = &i915_gem_phys_ops;
> +
> +	return i915_gem_object_get_pages(obj);
>  }
>  
>  static int
> @@ -311,6 +384,14 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>  	struct drm_device *dev = obj->base.dev;
>  	void *vaddr = obj->phys_handle->vaddr + args->offset;
>  	char __user *user_data = to_user_ptr(args->data_ptr);
> +	int ret;
> +
> +	/* We manually control the domain here and pretend that it
> +	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
> +	 */
> +	ret = i915_gem_object_wait_rendering(obj, false);
> +	if (ret)
> +		return ret;
>  
>  	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
>  		unsigned long unwritten;
> @@ -326,6 +407,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>  			return -EFAULT;
>  	}
>  
> +	drm_clflush_virt_range(vaddr, args->size);
>  	i915_gem_chipset_flush(dev);
>  	return 0;
>  }
> @@ -1046,11 +1128,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  	 * pread/pwrite currently are reading and writing from the CPU
>  	 * perspective, requiring manual detiling by the client.
>  	 */
> -	if (obj->phys_handle) {
> -		ret = i915_gem_phys_pwrite(obj, args, file);
> -		goto out;
> -	}
> -
>  	if (obj->tiling_mode == I915_TILING_NONE &&
>  	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
>  	    cpu_write_needs_clflush(obj)) {
> @@ -1060,8 +1137,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  		 * textures). Fallback to the shmem path in that case. */
>  	}
>  
> -	if (ret == -EFAULT || ret == -ENOSPC)
> -		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	if (ret == -EFAULT || ret == -ENOSPC) {
> +		if (obj->phys_handle)
> +			ret = i915_gem_phys_pwrite(obj, args, file);
> +		else
> +			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	}
>  
>  out:
>  	drm_gem_object_unreference(&obj->base);
> @@ -3560,7 +3641,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	 * Stolen memory is always coherent with the GPU as it is explicitly
>  	 * marked as wc by the system, or the system is cache-coherent.
>  	 */
> -	if (obj->stolen)
> +	if (obj->stolen || obj->phys_handle)
>  		return false;
>  
>  	/* If the GPU is snooping the contents of the CPU cache,
> @@ -4495,8 +4576,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  		}
>  	}
>  
> -	i915_gem_object_detach_phys(obj);
> -
>  	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
>  	 * before progressing. */
>  	if (obj->stolen)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index ff57f07..c6b229f 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
>  #define I915_PARAM_HAS_WT     	 	 27
>  #define I915_PARAM_CMD_PARSER_VERSION	 28
> +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> -- 
> 2.1.3
> 

> From ae9a40e0d04464796cc782d4531f386398e5266a Mon Sep 17 00:00:00 2001
> From: Chris Wilson <chris at chris-wilson.co.uk>
> Date: Mon, 13 Oct 2014 14:08:10 +0100
> Subject: [PATCH 2/3] drm/i915: Broaden application of set-domain(GTT)
> 
> Previously, this was restricted to only operate on bound objects - to
> make pointer access through the GTT to the object coherent with writes
> to and from the GPU. A second usecase is drm_intel_bo_wait_rendering()
> which at present does not function unless the object also happens to
> be bound into the GGTT (on current systems that is becoming increasingly
> rare, especially for the typical requests from mesa). A third usecase is
> a future patch wishing to extend the coverage of the GTT domain to
> include objects not bound into the GGTT but still in its coherent cache
> domain. For the latter pair of requests, we need to operate on the
> object regardless of its bind state.
> 
> v2: After discussion with Akash, we came to the conclusion that the
> get-pages was required in order for accurate domain tracking in the
> corner cases (like the shrinker) and also useful for ensuring memory
> coherency with earlier cached CPU mmaps in case userspace uses exotic
> cache bypass (non-temporal) instructions.
> 
> Cc: Akash Goel <akash.goel at intel.com>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> 
> Conflicts:
> 	drivers/gpu/drm/i915/i915_gem.c
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++++++++++------------------
>  1 file changed, 21 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 124ec85..fe119e1 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1490,18 +1490,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>  	if (ret)
>  		goto unref;
>  
> -	if (read_domains & I915_GEM_DOMAIN_GTT) {
> +	if (read_domains & I915_GEM_DOMAIN_GTT)
>  		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
> -
> -		/* Silently promote "you're not bound, there was nothing to do"
> -		 * to success, since the client was just asking us to
> -		 * make sure everything was done.
> -		 */
> -		if (ret == -EINVAL)
> -			ret = 0;
> -	} else {
> +	else
>  		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
> -	}
>  
>  unref:
>  	drm_gem_object_unreference(&obj->base);
> @@ -3722,15 +3714,10 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
>  int
>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  {
> -	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> -	struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
>  	uint32_t old_write_domain, old_read_domains;
> +	struct i915_vma *vma;
>  	int ret;
>  
> -	/* Not valid to be called on unbound objects. */
> -	if (vma == NULL)
> -		return -EINVAL;
> -
>  	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
>  		return 0;
>  
> @@ -3739,6 +3726,19 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  		return ret;
>  
>  	i915_gem_object_retire(obj);
> +
> +	/* Flush and acquire obj->pages so that we are coherent through
> +	 * direct access in memory with previous cached writes through
> +	 * shmemfs and that our cache domain tracking remains valid.
> +	 * For example, if the obj->filp was moved to swap without us
> +	 * being notified and releasing the pages, we would mistakenly
> +	 * continue to assume that the obj remained out of the CPU cached
> +	 * domain.
> +	 */
> +	ret = i915_gem_object_get_pages(obj);
> +	if (ret)
> +		return ret;
> +
>  	i915_gem_object_flush_cpu_write_domain(obj, false);
>  
>  	/* Serialise direct access to this object with the barriers for
> @@ -3770,9 +3770,12 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  					    old_write_domain);
>  
>  	/* And bump the LRU for this access */
> -	if (i915_gem_object_is_inactive(obj))
> +	vma = i915_gem_obj_to_ggtt(obj);
> +	if (vma &&
> +	    drm_mm_node_allocated(&vma->node) &&
> +	    i915_gem_object_is_inactive(obj))
>  		list_move_tail(&vma->mm_list,
> -			       &dev_priv->gtt.base.inactive_list);
> +			       &to_i915(obj->base.dev)->gtt.base.inactive_list);
>  
>  	return 0;
>  }
> -- 
> 2.1.3
> 

> From 4882abcb71b4982371a1aad038d7565887138ee5 Mon Sep 17 00:00:00 2001
> From: Akash Goel <akash.goel at intel.com>
> Date: Thu, 23 Oct 2014 17:55:47 +0100
> Subject: [PATCH 3/3] drm/i915: Support creation of unbound wc user mappings
>  for objects
> 
> This patch provides support to create write-combining virtual mappings of
> GEM object. It intends to provide the same funtionality of 'mmap_gtt'
> interface without the constraints and contention of a limited aperture
> space, but requires clients handles the linear to tile conversion on their
> own. This is for improving the CPU write operation performance, as with such
> mapping, writes and reads are almost 50% faster than with mmap_gtt. Similar
> to the GTT mmapping, unlike the regular CPU mmapping, it avoids the cache
> flush after update from CPU side, when object is passed onto GPU.  This
> type of mapping is specially useful in case of sub-region update,
> i.e. when only a portion of the object is to be updated. Using a CPU mmap
> in such cases would normally incur a clflush of the whole object, and
> using a GTT mmapping would likely require eviction of an active object or
> fence and thus stall. The write-combining CPU mmap avoids both.
> 
> To ensure the cache coherency, before using this mapping, the GTT domain
> has been reused here. This provides the required cache flush if the object
> is in CPU domain or synchronization against the concurrent rendering.
> Although the access through an uncached mmap should automatically
> invalidate the cache lines, this may not be true for non-temporal write
> instructions and also not all pages of the object may be updated at any
> given point of time through this mapping.  Having a call to get_pages in
> set_to_gtt_domain function, as added in the earlier patch 'drm/i915:
> Broaden application of set-domain(GTT)', would guarantee the clflush and
> so there will be no cachelines holding the data for the object before it
> is accessed through this map.
> 
> The drm_i915_gem_mmap structure (for the DRM_I915_GEM_MMAP_IOCTL) has been
> extended with a new flags field (defaulting to 0 for existent users). In
> order for userspace to detect the extended ioctl, a new parameter
> I915_PARAM_HAS_EXT_MMAP has been added for versioning the ioctl interface.
> 
> v2: Fix error handling, invalid flag detection, renaming (ickle)
> v3: Adapt to fit "drm/i915: Make the physical object coherent with GTT" (dileks)
> 
> The new mmapping is exercised by igt/gem_mmap_wc,
> igt/gem_concurrent_blit and igt/gem_gtt_speed.
> 
> Change-Id: Ie883942f9e689525f72fe9a8d3780c3a9faa769a
> Signed-off-by: Akash Goel <akash.goel at intel.com>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_dma.c |  3 +++
>  drivers/gpu/drm/i915/i915_gem.c | 19 +++++++++++++++++++
>  include/uapi/drm/i915_drm.h     |  9 +++++++++
>  3 files changed, 31 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 9b08853..c034cfc 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1030,6 +1030,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
>  		value = 1;
>  		break;
> +	case I915_PARAM_MMAP_VERSION:
> +		value = 1;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fe119e1..48d0bbc 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1548,6 +1548,12 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>  	struct drm_gem_object *obj;
>  	unsigned long addr;
>  
> +	if (args->flags & ~(I915_MMAP_WC))
> +		return -EINVAL;
> +
> +	if (args->flags & I915_MMAP_WC && !cpu_has_pat)
> +		return -ENODEV;
> +
>  	obj = drm_gem_object_lookup(dev, file, args->handle);
>  	if (obj == NULL)
>  		return -ENOENT;
> @@ -1563,6 +1569,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>  	addr = vm_mmap(obj->filp, 0, args->size,
>  		       PROT_READ | PROT_WRITE, MAP_SHARED,
>  		       args->offset);
> +	if (args->flags & I915_MMAP_WC) {
> +		struct mm_struct *mm = current->mm;
> +		struct vm_area_struct *vma;
> +
> +		down_write(&mm->mmap_sem);
> +		vma = find_vma(mm, addr);
> +		if (vma)
> +			vma->vm_page_prot =
> +				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
> +		else
> +			addr = -ENOMEM;
> +		up_write(&mm->mmap_sem);
> +	}
>  	drm_gem_object_unreference_unlocked(obj);
>  	if (IS_ERR((void *)addr))
>  		return addr;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index c6b229f..f7eaefa 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -341,6 +341,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_WT     	 	 27
>  #define I915_PARAM_CMD_PARSER_VERSION	 28
>  #define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
> +#define I915_PARAM_MMAP_VERSION          30
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> @@ -488,6 +489,14 @@ struct drm_i915_gem_mmap {
>  	 * This is a fixed-size type for 32/64 compatibility.
>  	 */
>  	__u64 addr_ptr;
> +
> +	/**
> +	 * Flags for extended behaviour.
> +	 *
> +	 * Added in version 2.
> +	 */
> +	__u64 flags;
> +#define I915_MMAP_WC 0x1
>  };
>  
>  struct drm_i915_gem_mmap_gtt {
> -- 
> 2.1.3
> 

> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx


-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch