[Intel-gfx] [PATCH] drm/i915: GFDT support for SNB/IVB

ville.syrjala at linux.intel.com ville.syrjala at linux.intel.com
Fri Mar 1 19:32:57 CET 2013


From: Ville Syrjälä <ville.syrjala at linux.intel.com>

Currently all scanout buffers must be uncached because the
display controller doesn't snoop the LLC. SNB introduced another
method to guarantee coherency for the display controller. It's
called the GFDT or graphics data type.

Pages that have the GFDT bit enabled in their PTEs get flushed
all the way to memory when a MI_FLUSH_DW or PIPE_CONTROL is
issued with the "synchronize GFDT" bit set.

So rather than making all scanout buffers uncached, set the GFDT
bit in their PTEs, and modify the ring flush functions to enable
the "synchronize GFDT" bit.

On HSW the GFDT bit was removed from the PTE, and it's only present in
surface state, so we can't really set it from the kernel. Also the docs
state that the hardware isn't actually guaranteed to respect the GFDT
bit. So it looks like GFDT isn't all that useful on HSW.

So far I've tried this very quickly on an IVB machine, and
it seems to be working as advertised. No idea if it does any
good though.

TODO:
- make sure there are no missing flushes (CPU access doesn't
  respect GFDT after all).
- measure it and see if there's some real benefit
- maybe we can track whether "synchronize GFDT" needs to be
  issued, and skip it when possible. needs some numbers to
  determine if it's worthwile.

Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  18 +++--
 drivers/gpu/drm/i915/i915_gem.c            | 109 ++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_context.c    |   3 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  10 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c        |  38 ++++++----
 drivers/gpu/drm/i915/i915_reg.h            |   2 +
 drivers/gpu/drm/i915/intel_display.c       |   4 +-
 drivers/gpu/drm/i915/intel_overlay.c       |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  25 +++++++
 9 files changed, 167 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e95337c..537b344 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -408,7 +408,8 @@ struct i915_gtt {
 	void (*gtt_insert_entries)(struct drm_device *dev,
 				   struct sg_table *st,
 				   unsigned int pg_start,
-				   enum i915_cache_level cache_level);
+				   enum i915_cache_level cache_level,
+				   bool gfdt);
 };
 #define gtt_total_entries(gtt) ((gtt).total >> PAGE_SHIFT)
 
@@ -429,7 +430,8 @@ struct i915_hw_ppgtt {
 	void (*insert_entries)(struct i915_hw_ppgtt *ppgtt,
 			       struct sg_table *st,
 			       unsigned int pg_start,
-			       enum i915_cache_level cache_level);
+			       enum i915_cache_level cache_level,
+			       bool gfdt);
 	void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
 };
 
@@ -1169,6 +1171,7 @@ struct drm_i915_gem_object {
 	unsigned int fenced_gpu_access:1;
 
 	unsigned int cache_level:2;
+	unsigned int gfdt:1;
 
 	unsigned int has_aliasing_ppgtt_mapping:1;
 	unsigned int has_global_gtt_mapping:1;
@@ -1310,6 +1313,10 @@ struct drm_i915_file_private {
 #define HAS_LLC(dev)            (INTEL_INFO(dev)->has_llc)
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
+/* Only SNB and IVB have GFDT in PTEs */
+#define HAS_GFDT(dev)		(HAS_LLC(dev) && !IS_HASWELL(dev) && \
+				 (IS_GEN6(dev) || IS_GEN7(dev)))
+
 #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 6)
 #define HAS_ALIASING_PPGTT(dev)	(INTEL_INFO(dev)->gen >=6 && !IS_VALLEYVIEW(dev))
 
@@ -1639,7 +1646,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
 int __must_check
 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
 int __must_check
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+i915_gem_object_pin_to_display_plane(struct drm_device *dev,
+				     struct drm_i915_gem_object *obj,
 				     u32 alignment,
 				     struct intel_ring_buffer *pipelined);
 int i915_gem_attach_phys_object(struct drm_device *dev,
@@ -1681,14 +1689,14 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
-			    enum i915_cache_level cache_level);
+			    enum i915_cache_level cache_level, bool gfdt);
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
 			      struct drm_i915_gem_object *obj);
 
 void i915_gem_restore_gtt_mappings(struct drm_device *dev);
 int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
-				enum i915_cache_level cache_level);
+			      enum i915_cache_level cache_level, bool gfdt);
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
 void i915_gem_init_global_gtt(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8413ffc..c635430 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3035,8 +3035,13 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
 	 * and flushes when moving into and out of the RENDER domain, correct
 	 * snooping behaviour occurs naturally as the result of our domain
 	 * tracking.
+	 *
+	 * Also flush all pinned objects with GFDT enabled. Such objects
+	 * may be used for scanout, and the CPU doesn't know anything
+	 * about GFDT.
 	 */
-	if (obj->cache_level != I915_CACHE_NONE)
+	if (obj->cache_level != I915_CACHE_NONE &&
+	    !(obj->gfdt && obj->pin_count))
 		return;
 
 	trace_i915_gem_object_clflush(obj);
@@ -3154,6 +3159,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
+	bool gfdt = obj->gfdt;
+
+	/* Clear GFDT when moving to uncached */
+	if (cache_level == I915_CACHE_NONE)
+		gfdt = false;
 
 	if (obj->cache_level == cache_level)
 		return 0;
@@ -3187,10 +3197,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		}
 
 		if (obj->has_global_gtt_mapping)
-			i915_gem_gtt_bind_object(obj, cache_level);
+			i915_gem_gtt_bind_object(obj, cache_level, gfdt);
 		if (obj->has_aliasing_ppgtt_mapping)
 			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
-					       obj, cache_level);
+					       obj, cache_level, gfdt);
 
 		obj->gtt_space->color = cache_level;
 	}
@@ -3219,6 +3229,65 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	}
 
 	obj->cache_level = cache_level;
+	obj->gfdt = gfdt;
+	i915_gem_verify_gtt(dev);
+	return 0;
+}
+
+static int i915_gem_object_set_gfdt(struct drm_i915_gem_object *obj,
+				    bool gfdt)
+{
+	struct drm_device *dev = obj->base.dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	int ret;
+
+	if (!HAS_GFDT(dev))
+		return -ENODEV;
+
+	if (obj->gfdt == gfdt)
+		return 0;
+
+	/* no point in setting GFDT on uncached object */
+	if (obj->cache_level == I915_CACHE_NONE)
+		return -EINVAL;
+
+	if (obj->gtt_space) {
+		ret = i915_gem_object_finish_gpu(obj);
+		if (ret)
+			return ret;
+
+		i915_gem_object_finish_gtt(obj);
+
+		if (obj->has_global_gtt_mapping)
+			i915_gem_gtt_bind_object(obj, obj->cache_level, gfdt);
+		if (obj->has_aliasing_ppgtt_mapping)
+			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
+					       obj, obj->cache_level, gfdt);
+	}
+
+	if (gfdt) {
+		u32 old_read_domains, old_write_domain;
+
+		/*
+		 * Since we're just now enabling GFDT there's most
+		 * likely dirty data in the LLC. It must be flushed
+		 * to memory the old fashined way.
+		 */
+		WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
+		WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
+
+		old_read_domains = obj->base.read_domains;
+		old_write_domain = obj->base.write_domain;
+
+		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+
+		trace_i915_gem_object_change_domain(obj,
+						    old_read_domains,
+						    old_write_domain);
+	}
+
+	obj->gfdt = gfdt;
 	i915_gem_verify_gtt(dev);
 	return 0;
 }
@@ -3291,7 +3360,8 @@ unlock:
  * any flushes to be pipelined (for pageflips).
  */
 int
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+i915_gem_object_pin_to_display_plane(struct drm_device *dev,
+				     struct drm_i915_gem_object *obj,
 				     u32 alignment,
 				     struct intel_ring_buffer *pipelined)
 {
@@ -3304,18 +3374,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 			return ret;
 	}
 
-	/* The display engine is not coherent with the LLC cache on gen6.  As
-	 * a result, we make sure that the pinning that is about to occur is
-	 * done with uncached PTEs. This is lowest common denominator for all
-	 * chipsets.
-	 *
-	 * However for gen6+, we could do better by using the GFDT bit instead
-	 * of uncaching, which would allow us to flush all the LLC-cached data
-	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+	/*
+	 * Try to set the GFDT bit instead of uncaching. This allow us to flush
+	 * all the LLC-cached data with that bit in the PTE to main memory with
+	 * just one PIPE_CONTROL.
 	 */
-	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
-	if (ret)
-		return ret;
+	ret = i915_gem_object_set_gfdt(obj, true);
+	if (ret) {
+		/*
+		 * The display engine is not coherent with the LLC cache on gen6.  As
+		 * a result, we make sure that the pinning that is about to occur is
+		 * done with uncached PTEs. This is lowest common denominator for all
+		 * chipsets.
+		 */
+		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+		if (ret)
+			return ret;
+	}
 
 	/* As the user may map the buffer once pinned in the display plane
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
@@ -3499,11 +3574,11 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
 			return ret;
 
 		if (!dev_priv->mm.aliasing_ppgtt)
-			i915_gem_gtt_bind_object(obj, obj->cache_level);
+			i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
 	}
 
 	if (!obj->has_global_gtt_mapping && map_and_fenceable)
-		i915_gem_gtt_bind_object(obj, obj->cache_level);
+		i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
 
 	obj->pin_count++;
 	obj->pin_mappable |= map_and_fenceable;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 21177d9..7244c0c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -382,7 +382,8 @@ static int do_switch(struct i915_hw_context *to)
 	}
 
 	if (!to->obj->has_global_gtt_mapping)
-		i915_gem_gtt_bind_object(to->obj, to->obj->cache_level);
+		i915_gem_gtt_bind_object(to->obj, to->obj->cache_level,
+					 to->obj->gfdt);
 
 	if (!to->is_initialized || is_default_context(to))
 		hw_flags |= MI_RESTORE_INHIBIT;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2f2daeb..28a3c42 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -197,7 +197,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
 	    !target_i915_obj->has_global_gtt_mapping)) {
 		i915_gem_gtt_bind_object(target_i915_obj,
-					 target_i915_obj->cache_level);
+					 target_i915_obj->cache_level,
+					 target_i915_obj->gfdt);
 	}
 
 	/* Validate that the target is in a valid r/w GPU domain */
@@ -432,7 +433,7 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 	/* Ensure ppgtt mapping exists if needed */
 	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
 		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
-				       obj, obj->cache_level);
+				       obj, obj->cache_level, obj->gfdt);
 
 		obj->has_aliasing_ppgtt_mapping = 1;
 	}
@@ -449,7 +450,7 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 
 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
 	    !obj->has_global_gtt_mapping)
-		i915_gem_gtt_bind_object(obj, obj->cache_level);
+		i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
 
 	return 0;
 }
@@ -1011,7 +1012,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	 * hsw should have this fixed, but let's be paranoid and do it
 	 * unconditionally for now. */
 	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
-		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
+		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level,
+					 batch_obj->gfdt);
 
 	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 926a1e2..3c7e48b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -42,15 +42,20 @@ typedef uint32_t gtt_pte_t;
 #define HSW_PTE_UNCACHED		(0)
 #define GEN6_PTE_CACHE_LLC		(2 << 1)
 #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
+#define GEN6_PTE_GFDT			(1 << 3)
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
 
 static inline gtt_pte_t gen6_pte_encode(struct drm_device *dev,
 					dma_addr_t addr,
-					enum i915_cache_level level)
+					enum i915_cache_level level,
+					bool gfdt)
 {
 	gtt_pte_t pte = GEN6_PTE_VALID;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
 
+	if (gfdt && HAS_GFDT(dev))
+		pte |= GEN6_PTE_GFDT;
+
 	switch (level) {
 	case I915_CACHE_LLC_MLC:
 		/* Haswell doesn't set L3 this way */
@@ -89,7 +94,7 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 
 	scratch_pte = gen6_pte_encode(ppgtt->dev,
 				      ppgtt->scratch_page_dma_addr,
-				      I915_CACHE_LLC);
+				      I915_CACHE_LLC, false);
 
 	while (num_entries) {
 		last_pte = first_pte + num_entries;
@@ -112,7 +117,8 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 				      struct sg_table *pages,
 				      unsigned first_entry,
-				      enum i915_cache_level cache_level)
+				      enum i915_cache_level cache_level,
+				      bool gfdt)
 {
 	gtt_pte_t *pt_vaddr;
 	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
@@ -133,7 +139,7 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
 			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
 			pt_vaddr[j] = gen6_pte_encode(ppgtt->dev, page_addr,
-						      cache_level);
+						      cache_level, gfdt);
 
 			/* grab the next page */
 			if (++m == segment_len) {
@@ -279,11 +285,12 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
 
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
-			    enum i915_cache_level cache_level)
+			    enum i915_cache_level cache_level,
+			    bool gfdt)
 {
 	ppgtt->insert_entries(ppgtt, obj->pages,
 			      obj->gtt_space->start >> PAGE_SHIFT,
-			      cache_level);
+			      cache_level, gfdt);
 }
 
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
@@ -401,7 +408,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
 		i915_gem_clflush_object(obj);
-		i915_gem_gtt_bind_object(obj, obj->cache_level);
+		i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
 	}
 
 	i915_gem_chipset_flush(dev);
@@ -429,7 +436,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
 static void gen6_ggtt_insert_entries(struct drm_device *dev,
 				     struct sg_table *st,
 				     unsigned int first_entry,
-				     enum i915_cache_level level)
+				     enum i915_cache_level level,
+				     bool gfdt)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct scatterlist *sg = st->sgl;
@@ -443,7 +451,7 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 		len = sg_dma_len(sg) >> PAGE_SHIFT;
 		for (m = 0; m < len; m++) {
 			addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			iowrite32(gen6_pte_encode(dev, addr, level),
+			iowrite32(gen6_pte_encode(dev, addr, level, gfdt),
 				  &gtt_entries[i]);
 			i++;
 		}
@@ -457,7 +465,7 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 	 */
 	if (i != 0)
 		WARN_ON(readl(&gtt_entries[i-1])
-			!= gen6_pte_encode(dev, addr, level));
+			!= gen6_pte_encode(dev, addr, level, gfdt));
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -483,7 +491,7 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
 		num_entries = max_entries;
 
 	scratch_pte = gen6_pte_encode(dev, dev_priv->gtt.scratch_page_dma,
-				      I915_CACHE_LLC);
+				      I915_CACHE_LLC, false);
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
 	readl(gtt_base);
@@ -493,7 +501,8 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
 static void i915_ggtt_insert_entries(struct drm_device *dev,
 				     struct sg_table *st,
 				     unsigned int pg_start,
-				     enum i915_cache_level cache_level)
+				     enum i915_cache_level cache_level,
+				     bool gfdt)
 {
 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
@@ -511,14 +520,15 @@ static void i915_ggtt_clear_range(struct drm_device *dev,
 
 
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
-			      enum i915_cache_level cache_level)
+			      enum i915_cache_level cache_level,
+			      bool gfdt)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	dev_priv->gtt.gtt_insert_entries(dev, obj->pages,
 					 obj->gtt_space->start >> PAGE_SHIFT,
-					 cache_level);
+					 cache_level, gfdt);
 
 	obj->has_global_gtt_mapping = 1;
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index cd226c2..6aa29d5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -242,6 +242,7 @@
 #define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
 #define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
 #define   MI_INVALIDATE_TLB		(1<<18)
+#define   MI_SYNCHRONIZE_GFDT		(1<<17)
 #define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
 #define   MI_INVALIDATE_BSD		(1<<7)
 #define   MI_FLUSH_DW_USE_GTT		(1<<2)
@@ -311,6 +312,7 @@
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
 #define   PIPE_CONTROL_CS_STALL				(1<<20)
 #define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
+#define   PIPE_CONTROL_SYNCHRONIZE_GFDT			(1<<17)
 #define   PIPE_CONTROL_QW_WRITE				(1<<14)
 #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
 #define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d912306..fc99d55 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1968,7 +1968,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
 	}
 
 	dev_priv->mm.interruptible = false;
-	ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined);
+	ret = i915_gem_object_pin_to_display_plane(dev, obj, alignment, pipelined);
 	if (ret)
 		goto err_interruptible;
 
@@ -6340,7 +6340,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
 			goto fail_locked;
 		}
 
-		ret = i915_gem_object_pin_to_display_plane(obj, 0, NULL);
+		ret = i915_gem_object_pin_to_display_plane(dev, obj, 0, NULL);
 		if (ret) {
 			DRM_ERROR("failed to move cursor bo into the GTT\n");
 			goto fail_locked;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 67a2501..b4110a2 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -695,7 +695,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	if (ret != 0)
 		return ret;
 
-	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL);
+	ret = i915_gem_object_pin_to_display_plane(dev, new_bo, 0, NULL);
 	if (ret != 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1d5d613..8406247 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -234,6 +234,12 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
 		 * when the render cache is indeed flushed.
 		 */
 		flags |= PIPE_CONTROL_CS_STALL;
+
+		/* Flush GFDT out to memory */
+		if (HAS_GFDT(ring->dev)) {
+			flags |= PIPE_CONTROL_QW_WRITE;
+			flags |= PIPE_CONTROL_SYNCHRONIZE_GFDT;
+		}
 	}
 	if (invalidate_domains) {
 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
@@ -306,6 +312,13 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
 	if (flush_domains) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+
+		/* Flush GFDT out to memory */
+		if (HAS_GFDT(ring->dev)) {
+			flags |= PIPE_CONTROL_QW_WRITE;
+			flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+			flags |= PIPE_CONTROL_SYNCHRONIZE_GFDT;
+		}
 	}
 	if (invalidate_domains) {
 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
@@ -1557,6 +1570,12 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
 		return ret;
 
 	cmd = MI_FLUSH_DW;
+
+	/* Flush GFDT out to memory */
+	if (flush & I915_GEM_GPU_DOMAINS && HAS_GFDT(ring->dev))
+		cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW |
+			MI_SYNCHRONIZE_GFDT;
+
 	/*
 	 * Bspec vol 1c.5 - video engine command streamer:
 	 * "If ENABLED, all TLBs will be invalidated once the flush
@@ -1629,6 +1648,12 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
 		return ret;
 
 	cmd = MI_FLUSH_DW;
+
+	/* Flush GFDT out to memory */
+	if (flush & I915_GEM_DOMAIN_RENDER && HAS_GFDT(ring->dev))
+		cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW |
+			MI_SYNCHRONIZE_GFDT;
+
 	/*
 	 * Bspec vol 1c.3 - blitter engine command streamer:
 	 * "If ENABLED, all TLBs will be invalidated once the flush
-- 
1.7.12.4




More information about the Intel-gfx mailing list