[Intel-gfx] [PATCH] i915: support page flipping
Jesse Barnes
jbarnes at virtuousgeek.org
Thu Feb 26 23:35:39 CET 2009
On Thursday, February 26, 2009 1:56:52 pm Chris Wilson wrote:
> On Thu, 2009-02-26 at 13:28 -0800, Jesse Barnes wrote:
> > Add a new page flip ioctl to the i915 driver. The new ioctl takes the
> > new drm_i915_gem_page_flip arg, which contains a buffer object target for
> > the flip, an offset into that buffer, and other buffer parameters to be
> > used for the flip.
> >
> > If a flip is already pending on the object in question, the ioctl waits
> > for it to finish first, then queues the flip. An optional wait flag
> > causes the ioctl to wait for the flip to complete before the it returns.
> >
> > If an execbuffer containing an object with a pending flip comes in, it
> > will stall until the flip completes, to preserve glXSwapBuffers
> > semantics.
> >
> > Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>
>
> Didn't do too bad in spotting the missing checks... ;-)
... but I just had to leave a few nuggets for you. :)
> This has annoyed me every time (but I'm running out of things I can
> complain about ;-), can we just say
> pending = !list_empty();
> and be done with the conditional.
Sure, fixed.
> > + obj_priv = obj->driver_private;
>
> Need to check obj_priv->stride&7 == 0. Hmm, that may be impossible
> anyway.
I've left that out, though I guess it wouldn't hurt.
> > + vblank_ref_err = drm_vblank_get(dev, pipe);
> > + if (!vblank_ref_err) {
>
> Ok, this seems like a reasonable fallback. If vblank is not enabled then
> you just queue the flip without waiting.
Right, but I also forgot to unref in the pin failure case, so I've fixed that.
> > + ret = i915_gem_object_pin(obj, 0);
> > + if (ret) {
> > + DRM_ERROR("failed to pin object for flip\n");
> > + ret = -EBUSY;
>
> What's the rationale for changing the reported error here? (And it might
> be helpful to printk the original error value.)
Oh I'm not sure what's going on there; I've changed it to just return the pin
failure.
> > + * Put the object in the GTT domain before the flip,
> > + * since there may be outstanding rendering
> > + */
> > + i915_gem_object_set_to_gtt_domain(obj, 0);
>
> Need to check, report and handle errors here. And yes, these do occur in
> the wild for some as of yet unknown reason.
Fixed. Though Mesa is now flushing before calling the server so it should
happen less often.
> > + reqno = i915_add_request(dev, 0);
>
> Be consistent and call this seqno like the rest of the code, please.
Sure, fixed.
> > + /* Look up object handles */
> > + for (i = 0; i < args->buffer_count; i++) {
> > + object_list[i] = drm_gem_object_lookup(dev, file_priv,
> > + exec_list[i].handle);
> > + if (object_list[i] == NULL) {
> > + DRM_ERROR("Invalid object handle %d at index %d\n",
> > + exec_list[i].handle, i);
> > + ret = -EBADF;
> > + goto pre_mutex_err;
>
> Hmm, can't jump straight to pre_mutex_err as that means you leak the
> references on object already looked up. Take the mutex and goto err;
Ah yeah, missed that, looks like 'i' will be right if we jump there too;
fixed.
--
Jesse Barnes, Intel Open Source Technology Center
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 2d797ff..050682a 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -829,6 +829,178 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
return 0;
}
+static int i915_pipe_to_plane(struct drm_device *dev, int pipe)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ u32 reg, pipe_mask;
+
+ if (pipe == 0)
+ pipe_mask = DISPPLANE_SEL_PIPE_A;
+ else
+ pipe_mask = DISPPLANE_SEL_PIPE_B;
+
+ pipe_mask |= DISPLAY_PLANE_ENABLE;
+
+ reg = I915_READ(DSPACNTR);
+ if ((reg & (DISPLAY_PLANE_ENABLE | DISPPLANE_SEL_PIPE_MASK)) ==
+ pipe_mask)
+ return 0;
+
+ reg = I915_READ(DSPBCNTR);
+ if ((reg & (DISPLAY_PLANE_ENABLE | DISPPLANE_SEL_PIPE_MASK)) ==
+ pipe_mask)
+ return 1;
+
+ return -1;
+}
+
+bool
+i915_gem_flip_pending(struct drm_gem_object *obj)
+{
+ struct drm_device *dev = obj->dev;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ unsigned long flags;
+ bool pending;
+
+ spin_lock_irqsave(&dev_priv->vblank_lock, flags);
+ pending = !list_empty(&obj_priv->vblank_head);
+ spin_unlock_irqrestore(&dev_priv->vblank_lock, flags);
+
+ return pending;
+}
+
+static int i915_gem_page_flip(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_i915_gem_page_flip *flip_data = data;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ struct drm_gem_object *obj;
+ struct drm_i915_gem_object *obj_priv;
+ unsigned long flags;
+ uint32_t offset;
+ unsigned int pitch, pipe, plane, tiled;
+ int ret = 0, vblank_ref_err = 0, seqno;
+ RING_LOCALS;
+
+ if (!(dev->driver->driver_features & DRIVER_GEM))
+ return -ENODEV;
+
+ /*
+ * Reject unknown flags so future userspace knows what we (don't)
+ * support
+ */
+ if (flip_data->flags & (~I915_PAGE_FLIP_WAIT)) {
+ DRM_ERROR("bad page flip flags\n");
+ return -EINVAL;
+ }
+
+ if ((pipe = flip_data->pipe) > 1) {
+ DRM_ERROR("bad pipe\n");
+ return -EINVAL;
+ }
+
+ plane = i915_pipe_to_plane(dev, pipe);
+ if (plane < 0) {
+ DRM_ERROR("bad pipe (no planes enabled?)\n");
+ return -EINVAL;
+ }
+
+ obj = drm_gem_object_lookup(dev, file_priv, flip_data->handle);
+ if (obj == NULL)
+ return -EBADF;
+
+ /*
+ * Make sure the new buffer is in bounds
+ * FIXME: should probably check against current mode as well
+ */
+ if (flip_data->offset >= obj->size) {
+ DRM_ERROR("bad flip offset\n");
+ ret = -EINVAL;
+ goto out_unref_prelock;
+ }
+
+ obj_priv = obj->driver_private;
+
+ if (i915_gem_flip_pending(obj))
+ wait_for_completion(&obj_priv->vblank);
+
+ mutex_lock(&dev->struct_mutex);
+ if (obj_priv->tiling_mode != I915_TILING_NONE &&
+ obj_priv->tiling_mode != I915_TILING_X) {
+ DRM_ERROR("can only flip non-tiled or X tiled pages\n");
+ ret = -EINVAL;
+ goto out_unref;
+ }
+
+ vblank_ref_err = drm_vblank_get(dev, pipe);
+ if (!vblank_ref_err) {
+ ret = i915_gem_object_pin(obj, 0);
+ if (ret) {
+ drm_vblank_put(dev, pipe);
+ DRM_ERROR("failed to pin object for flip\n");
+ goto out_unref;
+ }
+ }
+
+ /*
+ * Put the object in the GTT domain before the flip,
+ * since there may be outstanding rendering
+ */
+ ret = i915_gem_object_set_to_gtt_domain(obj, 0);
+ if (ret) {
+ DRM_ERROR("move to GTT failed\n");
+ goto out_unref;
+ }
+
+ offset = obj_priv->gtt_offset + flip_data->offset;
+
+ pitch = obj_priv->stride;
+ tiled = (obj_priv->tiling_mode == I915_TILING_X);
+
+ BEGIN_LP_RING(4);
+ OUT_RING(CMD_OP_DISPLAYBUFFER_INFO | (plane << 20));
+ OUT_RING(pitch);
+ if (IS_I965G(dev)) {
+ OUT_RING(offset | tiled);
+ OUT_RING(((flip_data->x - 1) << 16) | (flip_data->y - 1));
+ } else {
+ OUT_RING(offset);
+ OUT_RING(MI_NOOP);
+ }
+ ADVANCE_LP_RING();
+
+ seqno = i915_add_request(dev, 0);
+
+ mutex_unlock(&dev->struct_mutex);
+
+ /*
+ * This is a bit racy; the flip above may have already happened
+ * by the time we get here. If that happens, the new back buffer
+ * won't be available for rendering for one extra frame, since
+ * the vblank list won't have the object.
+ */
+ spin_lock_irqsave(&dev_priv->vblank_lock, flags);
+ if (!vblank_ref_err)
+ list_add_tail(&obj_priv->vblank_head,
+ &dev_priv->mm.vblank_list[pipe]);
+
+ obj_priv->flip_seqno = seqno;
+ spin_unlock_irqrestore(&dev_priv->vblank_lock, flags);
+
+ if (!vblank_ref_err && (flip_data->flags & I915_PAGE_FLIP_WAIT))
+ wait_for_completion(&obj_priv->vblank);
+
+out_unref_prelock:
+ /* Take lock again for unref */
+ mutex_lock(&dev->struct_mutex);
+out_unref:
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ return ret;
+}
+
/**
* i915_probe_agp - get AGP bootup configuration
* @pdev: PCI device
@@ -1307,6 +1479,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, 0),
DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0),
DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_I915_GEM_PAGE_FLIP, i915_gem_page_flip, DRM_AUTH|DRM_MASTER),
};
int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3951a12..148e80a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -159,6 +159,10 @@ typedef struct drm_i915_private {
u32 irq_mask_reg;
u32 pipestat[2];
+ /** Protects vblank list */
+ spinlock_t vblank_lock;
+ struct work_struct vblank_work;
+
int tex_lru_log_granularity;
int allow_batchbuffer;
struct mem_block *agp_heap;
@@ -338,6 +342,11 @@ typedef struct drm_i915_private {
*/
struct delayed_work retire_work;
+ /**
+ * List of objects waiting on vblank events (one per pipe)
+ */
+ struct list_head vblank_list[2];
+
uint32_t next_gem_seqno;
/**
@@ -389,6 +398,13 @@ struct drm_i915_gem_object {
/** This object's place on the active/flushing/inactive lists */
struct list_head list;
+ /** Object's place on the vblank list (protected by vblank_lock)*/
+ struct list_head vblank_head;
+ /** sequence number for flip (when it passes the flip is done),
+ * protected by vblank lock
+ */
+ int flip_seqno;
+
/**
* This is set if the object is on the active or flushing lists
* (has pending rendering), and is not set if it's on inactive (ready
@@ -457,6 +473,9 @@ struct drm_i915_gem_object {
/** for phy allocated objects */
struct drm_i915_gem_phys_object *phys_obj;
+
+ /** for page flips and other vblank related blocks */
+ struct completion vblank;
};
/**
@@ -516,6 +535,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
extern int i915_emit_box(struct drm_device *dev,
struct drm_clip_rect __user *boxes,
int i, int DR1, int DR4);
+extern bool i915_gem_flip_pending(struct drm_gem_object *obj);
/* i915_irq.c */
extern int i915_irq_emit(struct drm_device *dev, void *data,
@@ -625,6 +645,9 @@ int i915_gem_attach_phys_object(struct drm_device *dev,
void i915_gem_detach_phys_object(struct drm_device *dev,
struct drm_gem_object *obj);
void i915_gem_free_all_phys_object(struct drm_device *dev);
+uint32_t i915_add_request(struct drm_device *dev, uint32_t flush_domains);
+int i915_seqno_passed(uint32_t seq1, uint32_t seq2);
+uint32_t i915_get_gem_seqno(struct drm_device *dev);
/* i915_gem_tiling.c */
void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3e025d5..f0996fb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -900,7 +900,7 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
*
* Returned sequence numbers are nonzero on success.
*/
-static uint32_t
+uint32_t
i915_add_request(struct drm_device *dev, uint32_t flush_domains)
{
drm_i915_private_t *dev_priv = dev->dev_private;
@@ -1028,7 +1028,7 @@ i915_gem_retire_request(struct drm_device *dev,
/**
* Returns true if seq1 is later than seq2.
*/
-static int
+int
i915_seqno_passed(uint32_t seq1, uint32_t seq2)
{
return (int32_t)(seq1 - seq2) >= 0;
@@ -2498,6 +2498,26 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
goto pre_mutex_err;
}
+ /* Look up object handles */
+ for (i = 0; i < args->buffer_count; i++) {
+ object_list[i] = drm_gem_object_lookup(dev, file_priv,
+ exec_list[i].handle);
+ if (object_list[i] == NULL) {
+ DRM_ERROR("Invalid object handle %d at index %d\n",
+ exec_list[i].handle, i);
+ ret = -EBADF;
+ mutex_lock(&dev->struct_mutex);
+ goto err;
+ }
+
+ if (i915_gem_flip_pending(object_list[i])) {
+ struct drm_i915_gem_object *obj_priv;
+
+ obj_priv = object_list[i]->driver_private;
+ wait_for_completion(&obj_priv->vblank);
+ }
+ }
+
mutex_lock(&dev->struct_mutex);
i915_verify_inactive(dev, __FILE__, __LINE__);
@@ -2516,18 +2536,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
goto pre_mutex_err;
}
- /* Look up object handles */
- for (i = 0; i < args->buffer_count; i++) {
- object_list[i] = drm_gem_object_lookup(dev, file_priv,
- exec_list[i].handle);
- if (object_list[i] == NULL) {
- DRM_ERROR("Invalid object handle %d at index %d\n",
- exec_list[i].handle, i);
- ret = -EBADF;
- goto err;
- }
- }
-
/* Pin and relocate */
for (pin_tries = 0; ; pin_tries++) {
ret = 0;
@@ -2920,6 +2928,9 @@ int i915_gem_init_object(struct drm_gem_object *obj)
obj_priv->obj = obj;
obj_priv->fence_reg = I915_FENCE_REG_NONE;
INIT_LIST_HEAD(&obj_priv->list);
+ INIT_LIST_HEAD(&obj_priv->vblank_head);
+
+ init_completion(&obj_priv->vblank);
return 0;
}
@@ -2980,8 +2991,10 @@ int
i915_gem_idle(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
+ struct drm_i915_gem_object *obj_priv, *tmp;
+ unsigned long irqflags;
uint32_t seqno, cur_seqno, last_seqno;
- int stuck, ret;
+ int stuck, ret, pipe;
mutex_lock(&dev->struct_mutex);
@@ -2995,10 +3008,25 @@ i915_gem_idle(struct drm_device *dev)
*/
dev_priv->mm.suspended = 1;
+ mutex_unlock(&dev->struct_mutex);
+
+ /* Wait for any outstanding flips */
+ spin_lock_irqsave(&dev_priv->vblank_lock, irqflags);
+ for (pipe = 0; pipe < 2; pipe++) {
+ list_for_each_entry_safe(obj_priv, tmp,
+ &dev_priv->mm.vblank_list[pipe],
+ vblank_head) {
+ spin_unlock_irqrestore(&dev_priv->vblank_lock, irqflags);
+ wait_for_completion(&obj_priv->vblank);
+ spin_lock_irqsave(&dev_priv->vblank_lock, irqflags);
+ }
+ }
+ spin_unlock_irqrestore(&dev_priv->vblank_lock, irqflags);
+
/* Cancel the retire work handler, wait for it to finish if running
*/
- mutex_unlock(&dev->struct_mutex);
cancel_delayed_work_sync(&dev_priv->mm.retire_work);
+
mutex_lock(&dev->struct_mutex);
i915_kernel_lost_context(dev);
@@ -3358,9 +3386,12 @@ i915_gem_load(struct drm_device *dev)
INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
INIT_LIST_HEAD(&dev_priv->mm.request_list);
+ INIT_LIST_HEAD(&dev_priv->mm.vblank_list[0]);
+ INIT_LIST_HEAD(&dev_priv->mm.vblank_list[1]);
INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
i915_gem_retire_work_handler);
dev_priv->mm.next_gem_seqno = 1;
+ spin_lock_init(&dev_priv->vblank_lock);
/* Old X drivers will take 0-2 for front, back, depth buffers */
dev_priv->fence_reg_start = 3;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 548ff2c..f50df88 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -187,6 +187,36 @@ u32 gm45_get_vblank_counter(struct drm_device *dev, int pipe)
return I915_READ(reg);
}
+static void i915_vblank_work_func(struct work_struct *work)
+{
+ drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
+ vblank_work);
+ struct drm_device *dev = dev_priv->dev;
+ struct drm_i915_gem_object *obj_priv, *tmp;
+ unsigned long irqflags;
+ int pipe, cur_seqno;
+
+ mutex_lock(&dev->struct_mutex);
+
+ spin_lock_irqsave(&dev_priv->vblank_lock, irqflags);
+ for (pipe = 0; pipe < 2; pipe++) {
+ list_for_each_entry_safe(obj_priv, tmp,
+ &dev_priv->mm.vblank_list[pipe],
+ vblank_head) {
+ cur_seqno = i915_get_gem_seqno(dev);
+ if (i915_seqno_passed(cur_seqno,
+ obj_priv->flip_seqno)) {
+ list_del_init(&obj_priv->vblank_head);
+ drm_vblank_put(dev, pipe);
+ i915_gem_object_unpin(obj_priv->obj);
+ complete(&obj_priv->vblank);
+ }
+ }
+ }
+ spin_unlock_irqrestore(&dev_priv->vblank_lock, irqflags);
+ mutex_unlock(&dev->struct_mutex);
+}
+
irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
{
struct drm_device *dev = (struct drm_device *) arg;
@@ -269,6 +299,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
drm_handle_vblank(dev, 1);
}
+ if (vblank)
+ schedule_work(&dev_priv->vblank_work);
+
if ((pipeb_stats & I915_LEGACY_BLC_EVENT_STATUS) ||
(iir & I915_ASLE_INTERRUPT))
opregion_asle_intr(dev);
@@ -533,6 +566,7 @@ void i915_driver_irq_preinstall(struct drm_device * dev)
I915_WRITE(IMR, 0xffffffff);
I915_WRITE(IER, 0x0);
(void) I915_READ(IER);
+ INIT_WORK(&dev_priv->vblank_work, i915_vblank_work_func);
}
int i915_driver_irq_postinstall(struct drm_device *dev)
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 912cd52..1ac4ded 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -184,6 +184,7 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_GEM_GET_TILING 0x22
#define DRM_I915_GEM_GET_APERTURE 0x23
#define DRM_I915_GEM_MMAP_GTT 0x24
+#define DRM_I915_GEM_PAGE_FLIP 0x25
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -219,6 +220,7 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct
drm_i915_gem_set_tiling)
#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct
drm_i915_gem_get_tiling)
#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct
drm_i915_gem_get_aperture)
+#define DRM_IOCTL_I915_GEM_PAGE_FLIP DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PAGE_FLIP, struct
drm_i915_gem_page_flip)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
@@ -654,4 +656,30 @@ struct drm_i915_gem_get_aperture {
uint64_t aper_available_size;
};
+#define I915_PAGE_FLIP_WAIT (1<<0) /* block on page flip completion */
+
+struct drm_i915_gem_page_flip {
+ /** Handle of new front buffer */
+ uint32_t handle;
+
+ /** Offset into the object to use */
+ uint64_t offset;
+
+ /**
+ * page flip flags (wait on flip only for now)
+ */
+ uint32_t flags;
+
+ /**
+ * pipe to flip
+ */
+ uint32_t pipe;
+
+ /**
+ * screen dimensions for flip
+ */
+ uint32_t x;
+ uint32_t y;
+};
+
#endif /* _I915_DRM_H_ */
More information about the Intel-gfx
mailing list