[Intel-gfx] [PATCH 110/190] drm/i915: Move vma->pin_count:4 to vma->flags

Chris Wilson chris at chris-wilson.co.uk
Mon Jan 11 02:44:54 PST 2016


Let's aide gcc in our pin_count tracking as
i915_vma_pin()/i915_vma_unpin() are some of the hotest of the hot
functions and gcc doesn't like bitfields that much!

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            | 20 +++++++--------
 drivers/gpu/drm/i915/i915_gem.c            | 27 ++++++++++++---------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 ++++----
 drivers/gpu/drm/i915/i915_gem_gtt.h        | 39 +++++++++++++++++-------------
 4 files changed, 52 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0e3ff0b24d4d..a81e0f6de593 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2686,20 +2686,20 @@ i915_vma_pin(struct i915_vma *vma,
 	     uint64_t alignment,
 	     uint64_t flags);
 /* Flags used by pin/bind&friends. */
-#define PIN_MAPPABLE	(1<<0)
-#define PIN_NONBLOCK	(1<<1)
-#define PIN_GLOBAL	(1<<2)
-#define PIN_OFFSET_BIAS	(1<<3)
-#define PIN_USER	(1<<4)
-#define PIN_UPDATE	(1<<5)
-#define PIN_ZONE_4G	(1<<6)
-#define PIN_HIGH	(1<<7)
-#define PIN_OFFSET_FIXED	(1<<8)
+#define PIN_GLOBAL	(1<<0)
+#define PIN_USER	(1<<1)
+#define PIN_UPDATE	(1<<2)
+#define PIN_MAPPABLE	(1<<3)
+#define PIN_ZONE_4G	(1<<4)
+#define PIN_NONBLOCK	(1<<5)
+#define PIN_HIGH	(1<<6)
+#define PIN_OFFSET_BIAS	(1<<7)
+#define PIN_OFFSET_FIXED (1<<8)
 #define PIN_OFFSET_MASK (~4095)
 
 static inline void __i915_vma_unpin(struct i915_vma *vma)
 {
-	vma->pin_count--;
+	vma->flags--;
 }
 
 static inline void i915_vma_unpin(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 495fb80edee0..9bbabc21d3e0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3484,38 +3484,41 @@ i915_vma_pin(struct i915_vma *vma,
 	     uint64_t alignment,
 	     uint64_t flags)
 {
-	unsigned bound = vma->bound;
+	unsigned bound;
 	int ret;
 
 	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
 	GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt);
 
-	if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
-		return -EBUSY;
-
 	/* Pin early to prevent the shrinker/eviction logic from destroying
 	 * our vma as we insert and bind.
 	 */
-	vma->pin_count++;
+	bound = vma->flags++;
+	if (WARN_ON((bound & 0xf) == (DRM_I915_GEM_OBJECT_MAX_PIN_COUNT-1))) {
+		ret = -EBUSY;
+		goto err;
+	}
 
-	if (!bound) {
+	if ((bound & 0xff) == 0) {
 		ret = i915_vma_insert(vma, size, alignment, flags);
 		if (ret)
 			goto err;
 	}
 
-	ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
-	if (ret)
-		goto err;
+	if (~(bound >> 4) & (flags & (GLOBAL_BIND | LOCAL_BIND))) {
+		ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
+		if (ret)
+			goto err;
 
-	if ((bound ^ vma->bound) & GLOBAL_BIND)
-		__i915_vma_set_map_and_fenceable(vma);
+		if ((bound ^ vma->flags) & (GLOBAL_BIND << 4))
+			__i915_vma_set_map_and_fenceable(vma);
+	}
 
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
 	return 0;
 
 err:
-	vma->pin_count--;
+	__i915_vma_unpin(vma);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index be90d907f890..79dbd74b73c2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,10 +34,10 @@
 #include <linux/dma_remapping.h>
 #include <linux/uaccess.h>
 
-#define  __EXEC_OBJECT_HAS_PIN (1<<31)
-#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
-#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
-#define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
+#define  __EXEC_OBJECT_HAS_PIN (1U<<31)
+#define  __EXEC_OBJECT_HAS_FENCE (1U<<30)
+#define  __EXEC_OBJECT_NEEDS_MAP (1U<<29)
+#define  __EXEC_OBJECT_NEEDS_BIAS (1U<<28)
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
@@ -253,7 +253,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 		i915_gem_object_unpin_fence(obj);
 
 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
-		vma->pin_count--;
+		__i915_vma_unpin(vma);
 
 	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8877dc48f028..e6f64dcb2e77 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -185,13 +185,30 @@ struct i915_vma {
 
 	struct i915_gem_active last_read[I915_NUM_RINGS];
 
-	/** Flags and address space this VMA is bound to */
+	union {
+		struct {
+			/**
+			 * How many users have pinned this object in GTT space. The following
+			 * users can each hold at most one reference: pwrite/pread, execbuffer
+			 * (objects are not allowed multiple times for the same batchbuffer),
+			 * and the framebuffer code. When switching/pageflipping, the
+			 * framebuffer code has at most two buffers pinned per crtc.
+			 *
+			 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
+			 * bits with absolutely no headroom. So use 4 bits. */
+			unsigned int pin_count : 4;
+#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
+
+			/** Flags and address space this VMA is bound to */
 #define GLOBAL_BIND	(1<<0)
 #define LOCAL_BIND	(1<<1)
-	unsigned int bound : 4;
-	unsigned int active : I915_NUM_RINGS;
-	bool is_ggtt : 1;
-	bool closed : 1;
+			unsigned int bound : 4;
+			unsigned int active : I915_NUM_RINGS;
+			bool is_ggtt : 1;
+			bool closed : 1;
+		};
+		unsigned int flags;
+	};
 
 	/**
 	 * Support different GGTT views into the same object.
@@ -216,18 +233,6 @@ struct i915_vma {
 	struct hlist_node exec_node;
 	unsigned long exec_handle;
 	struct drm_i915_gem_exec_object2 *exec_entry;
-
-	/**
-	 * How many users have pinned this object in GTT space. The following
-	 * users can each hold at most one reference: pwrite/pread, execbuffer
-	 * (objects are not allowed multiple times for the same batchbuffer),
-	 * and the framebuffer code. When switching/pageflipping, the
-	 * framebuffer code has at most two buffers pinned per crtc.
-	 *
-	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
-	 * bits with absolutely no headroom. So use 4 bits. */
-	unsigned int pin_count:4;
-#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
 };
 
 struct i915_page_dma {
-- 
2.7.0.rc3



More information about the Intel-gfx mailing list