[Intel-gfx] [PATCH] drm/i915/vlv: Added write-enable pte bit support

Thu Jan 9 13:24:34 CET 2014

From: Akash Goel <akash.goel at intel.com>

This adds support for using the write-enable bit in the GTT entry for VLV.
This is handled via a read-only flag in the GEM buffer object which
is then used to check if the write-enable bit has to be set or not
when writing the GTT entries.
Currently by default only the Batch buffer & Ring buffers are being marked
as read only.

Signed-off-by: Akash Goel <akash.goel at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            | 10 ++++++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 45 ++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  3 ++
 4 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cc8afff..a3ab8a1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -620,7 +620,8 @@ struct i915_address_space {
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
 			       unsigned int first_entry,
-			       enum i915_cache_level cache_level);
+			       enum i915_cache_level cache_level,
+			       bool gt_ro);
 	void (*cleanup)(struct i915_address_space *vm);
 };
 
@@ -1671,6 +1672,13 @@ struct drm_i915_gem_object {
 	unsigned int pin_display:1;
 
 	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned long gt_ro:1;
+	unsigned long gt_old_ro:1;
+
+	/*
 	 * Is the GPU currently using a fence to access this buffer,
 	 */
 	unsigned int pending_fenced_gpu_access:1;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bbff8f9..3a15aec 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -164,6 +164,14 @@ eb_lookup_vmas(struct eb_vmas *eb,
 		list_add_tail(&vma->exec_list, &eb->vmas);
 		list_del_init(&obj->obj_exec_link);
 
+		/*
+		 * Currently mark each buffer as r/w by default.
+		 * If we are changing gt_ro, we need to make sure that it
+		 * gets re-mapped on gtt to update the entries.
+		 */
+		obj->gt_old_ro = obj->gt_ro;
+		obj->gt_ro = 0;
+
 		vma->exec_entry = &exec[i];
 		if (eb->and < 0) {
 			eb->lut[i] = vma;
@@ -1153,6 +1161,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* take note of the batch buffer before we might reorder the lists */
 	batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj;
 
+	/* Mark exec buffers as read-only from GPU side by default */
+	batch_obj->gt_ro = 1;
+
 	/* Move the objects en-masse into the GTT, evicting if necessary. */
 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 998f9a0..d87add4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -290,7 +290,8 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      unsigned first_entry,
-				      enum i915_cache_level cache_level)
+				      enum i915_cache_level cache_level,
+				      bool unused)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
@@ -792,11 +793,13 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      unsigned first_entry,
-				      enum i915_cache_level cache_level)
+				      enum i915_cache_level cache_level,
+				      bool gt_ro)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr;
+	gen6_gtt_pte_t pte;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	struct sg_page_iter sg_iter;
@@ -806,7 +809,16 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		dma_addr_t page_addr;
 
 		page_addr = sg_page_iter_dma_address(&sg_iter);
-		pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level, true);
+		pte  = vm->pte_encode(page_addr, cache_level, true);
+		if (IS_VALLEYVIEW(vm->dev)) {
+			/* Handle read-only request */
+			if (gt_ro)
+				pte &= ~BYT_PTE_WRITEABLE;
+			else
+				pte |= BYT_PTE_WRITEABLE;
+		}
+		pt_vaddr[act_pte] = pte;
+
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			act_pt++;
@@ -996,7 +1008,7 @@ ppgtt_bind_vma(struct i915_vma *vma,
 
 	WARN_ON(flags);
 
-	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
+	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level, vma->obj->gt_ro);
 }
 
 static void ppgtt_unbind_vma(struct i915_vma *vma)
@@ -1167,7 +1179,8 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     unsigned int first_entry,
-				     enum i915_cache_level level)
+				     enum i915_cache_level level,
+				     bool unused)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen8_gtt_pte_t __iomem *gtt_entries =
@@ -1214,18 +1227,29 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     unsigned int first_entry,
-				     enum i915_cache_level level)
+				     enum i915_cache_level level,
+				     bool gt_ro)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen6_gtt_pte_t __iomem *gtt_entries =
 		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
+	gen6_gtt_pte_t pte;
 	int i = 0;
 	struct sg_page_iter sg_iter;
 	dma_addr_t addr;
 
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 		addr = sg_page_iter_dma_address(&sg_iter);
-		iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
+		pte  = vm->pte_encode(addr, level, true);
+		if (IS_VALLEYVIEW(vm->dev)) {
+			/* Handle read-only request */
+			if (gt_ro)
+				pte &= ~BYT_PTE_WRITEABLE;
+			else
+				pte |= BYT_PTE_WRITEABLE;
+		}
+		iowrite32(pte, &gtt_entries[i]);
+
 		i++;
 	}
 
@@ -1236,8 +1260,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
 	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1]) !=
-			vm->pte_encode(addr, level, true));
+		WARN_ON(readl(&gtt_entries[i-1]) != pte);
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -1350,7 +1373,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 		if (!obj->has_global_gtt_mapping ||
 		    (cache_level != obj->cache_level)) {
 			vma->vm->insert_entries(vma->vm, obj->pages, entry,
-						cache_level);
+						cache_level, obj->gt_ro);
 			obj->has_global_gtt_mapping = 1;
 		}
 	}
@@ -1360,7 +1383,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 	     (cache_level != obj->cache_level))) {
 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base,
-					    vma->obj->pages, entry, cache_level);
+					    vma->obj->pages, entry, cache_level, obj->gt_ro);
 		vma->obj->has_aliasing_ppgtt_mapping = 1;
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 442c9a6..d257bb3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1352,6 +1352,9 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 		goto err_hws;
 	}
 
+	/* mark ring buffers as read-only from GPU side by default */
+	obj->gt_ro = 1;
+
 	ring->obj = obj;
 
 	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
-- 
1.8.5.2