[Intel-gfx] [PATCH] drm/i915/vlv: Added write-enable pte bit support
Goel, Akash
akash.goel at intel.com
Thu Feb 6 11:22:28 CET 2014
Please kindly review this patch.
Best regards
Akash
-----Original Message-----
From: Goel, Akash
Sent: Thursday, January 09, 2014 5:55 PM
To: intel-gfx at lists.freedesktop.org
Cc: Goel, Akash
Subject: [PATCH] drm/i915/vlv: Added write-enable pte bit support
From: Akash Goel <akash.goel at intel.com>
This adds support for using the write-enable bit in the GTT entry for VLV.
This is handled via a read-only flag in the GEM buffer object which is then used to check if the write-enable bit has to be set or not when writing the GTT entries.
Currently by default only the Batch buffer & Ring buffers are being marked as read only.
Signed-off-by: Akash Goel <akash.goel at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 10 ++++++-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++++++++
drivers/gpu/drm/i915/i915_gem_gtt.c | 45 ++++++++++++++++++++++--------
drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ++
4 files changed, 57 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cc8afff..a3ab8a1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -620,7 +620,8 @@ struct i915_address_space {
void (*insert_entries)(struct i915_address_space *vm,
struct sg_table *st,
unsigned int first_entry,
- enum i915_cache_level cache_level);
+ enum i915_cache_level cache_level,
+ bool gt_ro);
void (*cleanup)(struct i915_address_space *vm); };
@@ -1671,6 +1672,13 @@ struct drm_i915_gem_object {
unsigned int pin_display:1;
/*
+ * Is the object to be mapped as read-only to the GPU
+ * Only honoured if hardware has relevant pte bit
+ */
+ unsigned long gt_ro:1;
+ unsigned long gt_old_ro:1;
+
+ /*
* Is the GPU currently using a fence to access this buffer,
*/
unsigned int pending_fenced_gpu_access:1; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bbff8f9..3a15aec 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -164,6 +164,14 @@ eb_lookup_vmas(struct eb_vmas *eb,
list_add_tail(&vma->exec_list, &eb->vmas);
list_del_init(&obj->obj_exec_link);
+ /*
+ * Currently mark each buffer as r/w by default.
+ * If we are changing gt_ro, we need to make sure that it
+ * gets re-mapped on gtt to update the entries.
+ */
+ obj->gt_old_ro = obj->gt_ro;
+ obj->gt_ro = 0;
+
vma->exec_entry = &exec[i];
if (eb->and < 0) {
eb->lut[i] = vma;
@@ -1153,6 +1161,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* take note of the batch buffer before we might reorder the lists */
batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj;
+ /* Mark exec buffers as read-only from GPU side by default */
+ batch_obj->gt_ro = 1;
+
/* Move the objects en-masse into the GTT, evicting if necessary. */
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 998f9a0..d87add4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -290,7 +290,8 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
struct sg_table *pages,
unsigned first_entry,
- enum i915_cache_level cache_level)
+ enum i915_cache_level cache_level,
+ bool unused)
{
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base); @@ -792,11 +793,13 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct sg_table *pages,
unsigned first_entry,
- enum i915_cache_level cache_level)
+ enum i915_cache_level cache_level,
+ bool gt_ro)
{
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
gen6_gtt_pte_t *pt_vaddr;
+ gen6_gtt_pte_t pte;
unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
struct sg_page_iter sg_iter;
@@ -806,7 +809,16 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
dma_addr_t page_addr;
page_addr = sg_page_iter_dma_address(&sg_iter);
- pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level, true);
+ pte = vm->pte_encode(page_addr, cache_level, true);
+ if (IS_VALLEYVIEW(vm->dev)) {
+ /* Handle read-only request */
+ if (gt_ro)
+ pte &= ~BYT_PTE_WRITEABLE;
+ else
+ pte |= BYT_PTE_WRITEABLE;
+ }
+ pt_vaddr[act_pte] = pte;
+
if (++act_pte == I915_PPGTT_PT_ENTRIES) {
kunmap_atomic(pt_vaddr);
act_pt++;
@@ -996,7 +1008,7 @@ ppgtt_bind_vma(struct i915_vma *vma,
WARN_ON(flags);
- vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
+ vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level,
+vma->obj->gt_ro);
}
static void ppgtt_unbind_vma(struct i915_vma *vma) @@ -1167,7 +1179,8 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
struct sg_table *st,
unsigned int first_entry,
- enum i915_cache_level level)
+ enum i915_cache_level level,
+ bool unused)
{
struct drm_i915_private *dev_priv = vm->dev->dev_private;
gen8_gtt_pte_t __iomem *gtt_entries =
@@ -1214,18 +1227,29 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
struct sg_table *st,
unsigned int first_entry,
- enum i915_cache_level level)
+ enum i915_cache_level level,
+ bool gt_ro)
{
struct drm_i915_private *dev_priv = vm->dev->dev_private;
gen6_gtt_pte_t __iomem *gtt_entries =
(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
+ gen6_gtt_pte_t pte;
int i = 0;
struct sg_page_iter sg_iter;
dma_addr_t addr;
for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
addr = sg_page_iter_dma_address(&sg_iter);
- iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]);
+ pte = vm->pte_encode(addr, level, true);
+ if (IS_VALLEYVIEW(vm->dev)) {
+ /* Handle read-only request */
+ if (gt_ro)
+ pte &= ~BYT_PTE_WRITEABLE;
+ else
+ pte |= BYT_PTE_WRITEABLE;
+ }
+ iowrite32(pte, >t_entries[i]);
+
i++;
}
@@ -1236,8 +1260,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
* hardware should work, we must keep this posting read for paranoia.
*/
if (i != 0)
- WARN_ON(readl(>t_entries[i-1]) !=
- vm->pte_encode(addr, level, true));
+ WARN_ON(readl(>t_entries[i-1]) != pte);
/* This next bit makes the above posting read even more important. We
* want to flush the TLBs only after we're certain all the PTE updates @@ -1350,7 +1373,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
if (!obj->has_global_gtt_mapping ||
(cache_level != obj->cache_level)) {
vma->vm->insert_entries(vma->vm, obj->pages, entry,
- cache_level);
+ cache_level, obj->gt_ro);
obj->has_global_gtt_mapping = 1;
}
}
@@ -1360,7 +1383,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
(cache_level != obj->cache_level))) {
struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
appgtt->base.insert_entries(&appgtt->base,
- vma->obj->pages, entry, cache_level);
+ vma->obj->pages, entry, cache_level, obj->gt_ro);
vma->obj->has_aliasing_ppgtt_mapping = 1;
}
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 442c9a6..d257bb3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1352,6 +1352,9 @@ static int intel_init_ring_buffer(struct drm_device *dev,
goto err_hws;
}
+ /* mark ring buffers as read-only from GPU side by default */
+ obj->gt_ro = 1;
+
ring->obj = obj;
ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
--
1.8.5.2
More information about the Intel-gfx
mailing list