[Intel-gfx] [PATCH v4] drm/i915: Added write-enable pte bit support

Imre Deak imre.deak at intel.com
Mon Jun 16 21:50:14 CEST 2014


Hi Akash,

On Mon, 2014-06-09 at 08:36 +0530, akash.goel at intel.com wrote:
> From: Akash Goel <akash.goel at intel.com>
> 
> This adds support for a write-enable bit in the entry of GTT.
> This is handled via a read-only flag in the GEM buffer object which
> is then used to see how to set the bit when writing the GTT entries.
> Currently by default the Batch buffer & Ring buffers are marked as read only.
> 
> v2: Moved the pte override code for read-only bit to 'byt_pte_encode'. (Chris)
>     Fixed the issue of leaving 'gt_old_ro' as unused. (Chris)
> 
> v3: Removed the 'gt_old_ro' field, now setting RO bit only for Ring Buffers(Daniel).
> 
> v4: Added a new 'flags' parameter to all the pte(gen6) encode & insert_entries functions,
>     in lieu of overloading the cache_level enum (Daniel).
> 
> Signed-off-by: Akash Goel <akash.goel at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  6 +++++
>  drivers/gpu/drm/i915/i915_gem_gtt.c     | 48 ++++++++++++++++++++-------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h     |  5 ++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  3 +++
>  4 files changed, 41 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 506386e..dc18aee 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1658,6 +1658,12 @@ struct drm_i915_gem_object {
>  	unsigned int pin_display:1;
>  
>  	/*
> +	 * Is the object to be mapped as read-only to the GPU
> +	 * Only honoured if hardware has relevant pte bit
> +	 */
> +	unsigned long gt_ro:1;
> +
> +	/*
>  	 * Is the GPU currently using a fence to access this buffer,
>  	 */
>  	unsigned int pending_fenced_gpu_access:1;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 8b3cde7..221ea49 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -110,7 +110,7 @@ static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
>  
>  static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
>  				     enum i915_cache_level level,
> -				     bool valid)
> +				     bool valid, u32 unused)
>  {
>  	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
>  	pte |= GEN6_PTE_ADDR_ENCODE(addr);
> @@ -132,7 +132,7 @@ static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
>  
>  static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
>  				     enum i915_cache_level level,
> -				     bool valid)
> +				     bool valid, u32 unused)
>  {
>  	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
>  	pte |= GEN6_PTE_ADDR_ENCODE(addr);
> @@ -156,7 +156,7 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
>  
>  static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
>  				     enum i915_cache_level level,
> -				     bool valid)
> +				     bool valid, u32 flags)
>  {
>  	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
>  	pte |= GEN6_PTE_ADDR_ENCODE(addr);
> @@ -164,7 +164,8 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
>  	/* Mark the page as writeable.  Other platforms don't have a
>  	 * setting for read-only/writable, so this matches that behavior.
>  	 */
> -	pte |= BYT_PTE_WRITEABLE;
> +	if (!(flags & PTE_READ_ONLY))
> +		pte |= BYT_PTE_WRITEABLE;
>  
>  	if (level != I915_CACHE_NONE)
>  		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
> @@ -174,7 +175,7 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
>  
>  static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
>  				     enum i915_cache_level level,
> -				     bool valid)
> +				     bool valid, u32 unused)
>  {
>  	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
>  	pte |= HSW_PTE_ADDR_ENCODE(addr);
> @@ -187,7 +188,7 @@ static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
>  
>  static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
>  				      enum i915_cache_level level,
> -				      bool valid)
> +				      bool valid, u32 unused)
>  {
>  	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
>  	pte |= HSW_PTE_ADDR_ENCODE(addr);
> @@ -301,7 +302,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
>  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
>  				      struct sg_table *pages,
>  				      uint64_t start,
> -				      enum i915_cache_level cache_level)
> +				      enum i915_cache_level cache_level, u32 unused)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
> @@ -639,7 +640,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  	uint32_t pd_entry;
>  	int pte, pde;
>  
> -	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
> +	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
>  
>  	pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
>  		ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
> @@ -941,7 +942,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
>  	unsigned last_pte, i;
>  
> -	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
> +	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
>  
>  	while (num_entries) {
>  		last_pte = first_pte + num_entries;
> @@ -964,7 +965,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  				      struct sg_table *pages,
>  				      uint64_t start,
> -				      enum i915_cache_level cache_level)
> +				      enum i915_cache_level cache_level, u32 flags)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
> @@ -981,7 +982,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  
>  		pt_vaddr[act_pte] =
>  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> -				       cache_level, true);
> +				       cache_level, true, flags);
> +
>  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
>  			kunmap_atomic(pt_vaddr);
>  			pt_vaddr = NULL;
> @@ -1218,8 +1220,12 @@ ppgtt_bind_vma(struct i915_vma *vma,
>  	       enum i915_cache_level cache_level,
>  	       u32 flags)
>  {
> +	if (IS_VALLEYVIEW(vma->vm->dev))

The above check is redundant, since with the vlv/byt specific
pte_encode() we'll get already the right behavior.

> +		if (vma->obj->gt_ro)
> +			flags |= PTE_READ_ONLY;
> +
>  	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
> -				cache_level);
> +				cache_level, flags);
>  }
>  
>  static void ppgtt_unbind_vma(struct i915_vma *vma)
> @@ -1394,7 +1400,7 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
>  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  				     struct sg_table *st,
>  				     uint64_t start,
> -				     enum i915_cache_level level)
> +				     enum i915_cache_level level, u32 unused)
>  {
>  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
>  	unsigned first_entry = start >> PAGE_SHIFT;
> @@ -1440,7 +1446,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  				     struct sg_table *st,
>  				     uint64_t start,
> -				     enum i915_cache_level level)
> +				     enum i915_cache_level level, u32 flags)
>  {
>  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
>  	unsigned first_entry = start >> PAGE_SHIFT;
> @@ -1452,7 +1458,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  
>  	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
>  		addr = sg_page_iter_dma_address(&sg_iter);
> -		iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
> +		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
>  		i++;
>  	}
>  
> @@ -1464,7 +1470,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  	 */
>  	if (i != 0)
>  		WARN_ON(readl(&gtt_entries[i-1]) !=
> -			vm->pte_encode(addr, level, true));
> +			vm->pte_encode(addr, level, true, flags));
>  
>  	/* This next bit makes the above posting read even more important. We
>  	 * want to flush the TLBs only after we're certain all the PTE updates
> @@ -1518,7 +1524,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
>  		 first_entry, num_entries, max_entries))
>  		num_entries = max_entries;
>  
> -	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
> +	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0);
>  
>  	for (i = 0; i < num_entries; i++)
>  		iowrite32(scratch_pte, &gtt_base[i]);
> @@ -1567,6 +1573,10 @@ static void ggtt_bind_vma(struct i915_vma *vma,
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_object *obj = vma->obj;
>  
> +	if (IS_VALLEYVIEW(vma->vm->dev))

The check is redundant as above.

> +		if (obj->gt_ro)
> +			flags |= PTE_READ_ONLY;
> +
>  	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
>  	 * or we have a global mapping already but the cacheability flags have
>  	 * changed, set the global PTEs.
> @@ -1583,7 +1593,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
>  		    (cache_level != obj->cache_level)) {
>  			vma->vm->insert_entries(vma->vm, obj->pages,
>  						vma->node.start,
> -						cache_level);
> +						cache_level, flags);
>  			obj->has_global_gtt_mapping = 1;
>  		}
>  	}
> @@ -1595,7 +1605,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
>  		appgtt->base.insert_entries(&appgtt->base,
>  					    vma->obj->pages,
>  					    vma->node.start,
> -					    cache_level);
> +					    cache_level, flags);
>  		vma->obj->has_aliasing_ppgtt_mapping = 1;
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 1b96a06..674eaf5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -197,15 +197,16 @@ struct i915_address_space {
>  	/* FIXME: Need a more generic return type */
>  	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
>  				     enum i915_cache_level level,
> -				     bool valid); /* Create a valid PTE */
> +				     bool valid, u32 flags); /* Create a valid PTE */
>  	void (*clear_range)(struct i915_address_space *vm,
>  			    uint64_t start,
>  			    uint64_t length,
>  			    bool use_scratch);
> +#define PTE_READ_ONLY (1<<2)

As this uses the same namespace as GLOBAL_BIND, I would keep their
definitions together. Also I couldn't find any other flag besides
GLOBAL_BIND, in which case this could be (1 << 1).

With the above fixed the patch looks good to me:
Reviewed-by: Imre Deak <imre.deak at intel.com>

>  	void (*insert_entries)(struct i915_address_space *vm,
>  			       struct sg_table *st,
>  			       uint64_t start,
> -			       enum i915_cache_level cache_level);
> +			       enum i915_cache_level cache_level, u32 flags);
>  	void (*cleanup)(struct i915_address_space *vm);
>  };
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 279488a..ee99971 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1397,6 +1397,9 @@ static int allocate_ring_buffer(struct intel_engine_cs *ring)
>  	if (obj == NULL)
>  		return -ENOMEM;
>  
> +	/* mark ring buffers as read-only from GPU side by default */
> +	obj->gt_ro = 1;
> +
>  	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
>  	if (ret)
>  		goto err_unref;





More information about the Intel-gfx mailing list