[Intel-gfx] [PATCH] add fence register management to execbuf

Jesse Barnes jbarnes at virtuousgeek.org
Tue Jan 27 02:10:45 CET 2009


On Friday, January 23, 2009 4:25 pm Eric Anholt wrote:
> On Fri, 2009-01-23 at 14:12 -0800, Jesse Barnes wrote:
> > Adds code to set up fence registers at execbuf time on pre-965 chips
> > as
> > necessary.  Also fixes up a few bugs in the pre-965 tile register
> > support
> > (get_order != ffs).  The number of fences available to the kernel
> > defaults
> > to the hw limit minus 3 (for legacy X front/back/depth), but a new
> > parameter
> > allows userspace to override that as needed.
> >
> > We should probably be smarter here, and make sure the all the buffers
> > are
> > covered by fences before hanging the chip, and there may be other
> > steps
> > required in the fence eviction code (iirc Eric mentioned that some
> > additional
> > flushing may be required).
>
> Looks pretty good, and thanks for the flushing reminder.  Basically we
> need to flush the gpu write domain before waiting for rendering (or
> we'll bug!), and we need to clear the read domain flags (or the next
> person to come along after setting up their new fence register may get
> stale speculated garbage from reads that occurred while the fence reg
> wasn't in place).
>
> > --
> > Jesse Barnes, Intel Open Source Technology Center
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c
> > b/drivers/gpu/drm/i915/i915_gem.c
> > index 96316fd..41749cd 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1445,21 +1445,27 @@ static void i915_write_fence_reg(struct
> > drm_i915_fence_reg *reg) drm_i915_private_t *dev_priv = dev->dev_private;
> >  	struct drm_i915_gem_object *obj_priv = obj->driver_private;
> >  	int regnum = obj_priv->fence_reg;
> > +	int tile_width = 512;
> >  	uint32_t val;
> >  	uint32_t pitch_val;
> >
> >  	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
> >  	    (obj_priv->gtt_offset & (obj->size - 1))) {
> > -		WARN(1, "%s: object not 1M or size aligned\n", __func__);
> > +		WARN(1, "%s: object 0x%08x not 1M or size (0x%x) aligned\n",
> > +		     __func__, obj_priv->gtt_offset, obj->size);
> >  		return;
> >  	}
> >
> >  	if (obj_priv->tiling_mode == I915_TILING_Y && (IS_I945G(dev) ||
> >  						       IS_I945GM(dev) ||
> >  						       IS_G33(dev)))
> > -		pitch_val = (obj_priv->stride / 128) - 1;
> > -	else
> > -		pitch_val = (obj_priv->stride / 512) - 1;
> > +		tile_width = 128;
> > +
> > +	pitch_val = obj_priv->stride / tile_width;
> > +	WARN(pitch_val & (pitch_val - 1),
> > +	     "pitch value not a power of two tile widths\n");
>
> SET_TILING ioctl should just reject the tiling request if a bad stride
> or a bad tiling type for a given chipset was passed in, so we don't have
> to validate and WARN here.
>
> Hmm, actually all the fence management code probably ought to live in
> i915_gem_tiling.c.  Something for the next kernel, I guess.

Ok, here's what I've got based on the comments above.  It looked like there
was already to do what we wanted in the out of fence regs path, so I used
it (i915_gem_object_set_to_gtt_domain seems to do what we want).  And I moved
the tile stride checks to set_tiling time and tried to make things clear
there.


-- 
Jesse Barnes, Intel Open Source Technology Center

Adds code to set up fence registers at execbuf time on pre-965 chips as
necessary.  Also fixes up a few bugs in the pre-965 tile register support
(get_order != ffs).  The number of fences available to the kernel defaults
to the hw limit minus 3 (for legacy X front/back/depth), but a new parameter
allows userspace to override that as needed.

Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e219ef7..f4c3f65 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -731,6 +731,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_GEM:
 		value = dev_priv->has_gem;
 		break;
+	case I915_PARAM_NUM_FENCES_AVAIL:
+		value = dev_priv->num_fence_regs - dev_priv->fence_reg_start;
+		break;
 	default:
 		DRM_ERROR("Unknown parameter %d\n", param->param);
 		return -EINVAL;
@@ -764,6 +767,13 @@ static int i915_setparam(struct drm_device *dev, void *data,
 	case I915_SETPARAM_ALLOW_BATCHBUFFER:
 		dev_priv->allow_batchbuffer = param->value;
 		break;
+	case I915_SETPARAM_NUM_USED_FENCES:
+		if (param->value > dev_priv->num_fence_regs ||
+		    param->value < 0)
+			return -EINVAL;
+		/* Userspace can use first N regs */
+		dev_priv->fence_reg_start = param->value;
+		break;
 	default:
 		DRM_ERROR("unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 899c92f..814f38f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,7 +52,7 @@ static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
 					   unsigned alignment);
-static void i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
+static void i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write);
 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
 static int i915_gem_evict_something(struct drm_device *dev);
 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
@@ -567,6 +567,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	pgoff_t page_offset;
 	unsigned long pfn;
 	int ret = 0;
+	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 
 	/* We don't use vmf->pgoff since that has the fake offset */
 	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
@@ -586,7 +587,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	/* Need a new fence register? */
 	if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
 	    obj_priv->tiling_mode != I915_TILING_NONE)
-		i915_gem_object_get_fence_reg(obj);
+		i915_gem_object_get_fence_reg(obj, write);
 
 	pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
 		page_offset;
@@ -1445,21 +1446,25 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 	int regnum = obj_priv->fence_reg;
+	int tile_width = 512;
 	uint32_t val;
 	uint32_t pitch_val;
 
 	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
 	    (obj_priv->gtt_offset & (obj->size - 1))) {
-		WARN(1, "%s: object not 1M or size aligned\n", __func__);
+		WARN(1, "%s: object 0x%08x not 1M or size (0x%x) aligned\n",
+		     __func__, obj_priv->gtt_offset, obj->size);
 		return;
 	}
 
 	if (obj_priv->tiling_mode == I915_TILING_Y && (IS_I945G(dev) ||
 						       IS_I945GM(dev) ||
 						       IS_G33(dev)))
-		pitch_val = (obj_priv->stride / 128) - 1;
-	else
-		pitch_val = (obj_priv->stride / 512) - 1;
+		tile_width = 128;
+
+	/* Note: pitch better be a power of two tile widths */
+	pitch_val = obj_priv->stride / tile_width;
+	pitch_val = ffs(pitch_val) - 1;
 
 	val = obj_priv->gtt_offset;
 	if (obj_priv->tiling_mode == I915_TILING_Y)
@@ -1483,7 +1488,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
 
 	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
 	    (obj_priv->gtt_offset & (obj->size - 1))) {
-		WARN(1, "%s: object not 1M or size aligned\n", __func__);
+		WARN(1, "%s: object 0x%08x not 1M or size aligned\n",
+		     __func__, obj_priv->gtt_offset);
 		return;
 	}
 
@@ -1503,6 +1509,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
 /**
  * i915_gem_object_get_fence_reg - set up a fence reg for an object
  * @obj: object to map through a fence reg
+ * @write: object is about to be written
  *
  * When mapping objects through the GTT, userspace wants to be able to write
  * to them without having to worry about swizzling if the object is tiled.
@@ -1514,7 +1521,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
  * and tiling format.
  */
 static void
-i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
+i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write)
 {
 	struct drm_device *dev = obj->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1527,12 +1534,18 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
 		WARN(1, "allocating a fence for non-tiled object?\n");
 		break;
 	case I915_TILING_X:
-		WARN(obj_priv->stride & (512 - 1),
-		     "object is X tiled but has non-512B pitch\n");
+		if (!obj_priv->stride)
+			return;
+		WARN((obj_priv->stride & (512 - 1)),
+		     "object 0x%08x is X tiled but has non-512B pitch\n",
+		     obj_priv->gtt_offset);
 		break;
 	case I915_TILING_Y:
-		WARN(obj_priv->stride & (128 - 1),
-		     "object is Y tiled but has non-128B pitch\n");
+		if (!obj_priv->stride)
+			return;
+		WARN((obj_priv->stride & (128 - 1)),
+		     "object 0x%08x is Y tiled but has non-128B pitch\n",
+		     obj_priv->gtt_offset);
 		break;
 	}
 
@@ -1563,9 +1576,9 @@ try_again:
 		 * objects to finish before trying again.
 		 */
 		if (i == dev_priv->num_fence_regs) {
-			ret = i915_gem_object_wait_rendering(reg->obj);
+			ret = i915_gem_object_set_to_gtt_domain(reg->obj, write);
 			if (ret) {
-				WARN(ret, "wait_rendering failed: %d\n", ret);
+				WARN(ret, "switch to GTT domain failed: %d\n", ret);
 				return;
 			}
 			goto try_again;
@@ -1631,7 +1644,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 	if (dev_priv->mm.suspended)
 		return -EBUSY;
 	if (alignment == 0)
-		alignment = PAGE_SIZE;
+		alignment = i915_gem_get_gtt_alignment(obj);
 	if (alignment & (PAGE_SIZE - 1)) {
 		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
 		return -EINVAL;
@@ -2652,6 +2665,14 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
 				DRM_ERROR("Failure to bind: %d", ret);
 			return ret;
 		}
+		/*
+		 * Pre-965 chips need a fence register set up in order to
+		 * properly handle tiled surfaces.
+		 */
+		if (!IS_I965G(dev) &&
+		    obj_priv->fence_reg == I915_FENCE_REG_NONE &&
+		    obj_priv->tiling_mode != I915_TILING_NONE)
+			i915_gem_object_get_fence_reg(obj, true);
 	}
 	obj_priv->pin_count++;
 
@@ -3291,7 +3312,7 @@ i915_gem_load(struct drm_device *dev)
 	/* Old X drivers will take 0-2 for front, back, depth buffers */
 	dev_priv->fence_reg_start = 3;
 
-	if (IS_I965G(dev))
+	if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
 		dev_priv->num_fence_regs = 16;
 	else
 		dev_priv->num_fence_regs = 8;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 241f39b..6847518 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -173,6 +173,36 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
 }
 
+/* Check pitch constriants for all chips & tiling formats */
+static bool
+i915_tiling_ok(struct drm_device *dev, int stride, int tiling_mode)
+{
+	int tile_width = 512;
+
+	/* Linear is always fine */
+	if (tiling_mode == I915_TILING_NONE)
+		return true;
+
+	if (tiling_mode == I915_TILING_Y)
+		tile_width = 128;
+
+	/* 965+ just needs multiples of tile width */
+	if (IS_I965G(dev)) {
+		if (stride & (tile_width - 1))
+			return false;
+		return true;
+	}
+
+	/* Pre-965 needs power of two tile widths */
+	if (stride < tile_width)
+		return false;
+
+	if (stride & (stride - 1))
+		return false;
+
+	return true;
+}
+
 /**
  * Sets the tiling mode of an object, returning the required swizzling of
  * bit 6 of addresses in the object.
@@ -191,6 +221,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		return -EINVAL;
 	obj_priv = obj->driver_private;
 
+	if (!i915_tiling_ok(dev, args->stride, args->tiling_mode))
+		return -EINVAL;
+
 	mutex_lock(&dev->struct_mutex);
 
 	if (args->tiling_mode == I915_TILING_NONE) {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8f632b9..7b2f9c3 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -186,12 +186,12 @@
 #define FENCE_REG_830_0			0x2000
 #define   I830_FENCE_START_MASK		0x07f80000
 #define   I830_FENCE_TILING_Y_SHIFT	12
-#define   I830_FENCE_SIZE_BITS(size)	((get_order(size >> 19) - 1) << 8)
+#define   I830_FENCE_SIZE_BITS(size)	((ffs((size) >> 19) - 1) << 8)
 #define   I830_FENCE_PITCH_SHIFT	4
 #define   I830_FENCE_REG_VALID		(1<<0)
 
 #define   I915_FENCE_START_MASK		0x0ff00000
-#define   I915_FENCE_SIZE_BITS(size)	((get_order(size >> 20) - 1) << 8)
+#define   I915_FENCE_SIZE_BITS(size)	((ffs((size) >> 20) - 1) << 8)
 
 #define FENCE_REG_965_0			0x03000
 #define   I965_FENCE_PITCH_SHIFT	2
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index b3bcf72..912cd52 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -261,6 +261,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_LAST_DISPATCH         3
 #define I915_PARAM_CHIPSET_ID            4
 #define I915_PARAM_HAS_GEM               5
+#define I915_PARAM_NUM_FENCES_AVAIL      6
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -272,6 +273,7 @@ typedef struct drm_i915_getparam {
 #define I915_SETPARAM_USE_MI_BATCHBUFFER_START            1
 #define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY             2
 #define I915_SETPARAM_ALLOW_BATCHBUFFER                   3
+#define I915_SETPARAM_NUM_USED_FENCES                     4
 
 typedef struct drm_i915_setparam {
 	int param;




More information about the Intel-gfx mailing list