[Intel-gfx] [PATCH v2] drm/i915: Added write-enable pte bit support

akash.goel at intel.com akash.goel at intel.com
Sat Feb 8 11:31:07 CET 2014


From: Akash Goel <akash.goel at intel.com>

This adds support for a write-enable bit in the entry of GTT.
This is handled via a read-only flag in the GEM buffer object which
is then used to see how to set the bit when writing the GTT entries.
Currently by default the Batch buffer & Ring buffers are marked as read only.

v2: Moved the pte override code for read-only bit to 'byt_pte_encode'. (Chris)
    Fixed the issue of leaving 'gt_old_ro' as unused. (Chris)

Signed-off-by: Akash Goel <akash.goel at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  7 +++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 +++++++++++-
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 14 ++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  3 +++
 4 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 728b9c3..7f98a2c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1699,6 +1699,13 @@ struct drm_i915_gem_object {
 	unsigned int pin_display:1;
 
 	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned long gt_ro:1;
+	unsigned long gt_old_ro:1;
+
+	/*
 	 * Is the GPU currently using a fence to access this buffer,
 	 */
 	unsigned int pending_fenced_gpu_access:1;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 032def9..bb8e0bb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -164,6 +164,12 @@ eb_lookup_vmas(struct eb_vmas *eb,
 		list_add_tail(&vma->exec_list, &eb->vmas);
 		list_del_init(&obj->obj_exec_link);
 
+		/* Mark each buffer as r/w by default
+		 * If we are changing gt_ro, we need to make sure that it
+		 * gets re-mapped on gtt to update the ptes */
+		obj->gt_old_ro = obj->gt_ro;
+		obj->gt_ro = 0;
+
 		vma->exec_entry = &exec[i];
 		if (eb->and < 0) {
 			eb->lut[i] = vma;
@@ -670,7 +676,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 
 			if ((entry->alignment &&
 			     vma->node.start & (entry->alignment - 1)) ||
-			    (need_mappable && !obj->map_and_fenceable))
+			    (need_mappable && !obj->map_and_fenceable) ||
+			    ((obj->gt_old_ro != obj->gt_ro) && (IS_VALLEYVIEW(vm->dev))))
 				ret = i915_vma_unbind(vma);
 			else
 				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -1153,6 +1160,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* take note of the batch buffer before we might reorder the lists */
 	batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj;
 
+	/* Mark exec buffers as read-only from GPU side by default */
+	batch_obj->gt_ro = 1;
+
 	/* Move the objects en-masse into the GTT, evicting if necessary. */
 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6e858e1..1e02d44 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -151,6 +151,7 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
 
 #define BYT_PTE_WRITEABLE		(1 << 1)
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
+#define BYT_PTE_READ_ONLY		(1 << 31)
 
 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
@@ -167,6 +168,10 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 	if (level != I915_CACHE_NONE)
 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
 
+	/* Handle read-only request */
+	if (level & BYT_PTE_READ_ONLY)
+		pte &= ~BYT_PTE_WRITEABLE;
+
 	return pte;
 }
 
@@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		pt_vaddr[act_pte] =
 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
 				       cache_level, true);
+
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			pt_vaddr = NULL;
@@ -999,6 +1005,10 @@ ppgtt_bind_vma(struct i915_vma *vma,
 
 	WARN_ON(flags);
 
+	if (IS_VALLEYVIEW(vma->vm->dev))
+		if (vma->obj->gt_ro)
+			cache_level |= BYT_PTE_READ_ONLY;
+
 	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
 }
 
@@ -1336,6 +1346,10 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 
+	if (IS_VALLEYVIEW(dev))
+		if (obj->gt_ro)
+			cache_level |= BYT_PTE_READ_ONLY;
+
 	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
 	 * or we have a global mapping already but the cacheability flags have
 	 * changed, set the global PTEs.
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d897a19..2fee914 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1354,6 +1354,9 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 		goto err_hws;
 	}
 
+	/* mark ring buffers as read-only from GPU side by default */
+	obj->gt_ro = 1;
+
 	ring->obj = obj;
 
 	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
-- 
1.8.5.2




More information about the Intel-gfx mailing list