[Intel-gfx] [PATCH] i915: support page flipping

Chris Wilson chris at chris-wilson.co.uk
Thu Feb 26 22:56:52 CET 2009


On Thu, 2009-02-26 at 13:28 -0800, Jesse Barnes wrote:
> Add a new page flip ioctl to the i915 driver.  The new ioctl takes the new
> drm_i915_gem_page_flip arg, which contains a buffer object target for the
> flip, an offset into that buffer, and other buffer parameters to be used for
> the flip.
> 
> If a flip is already pending on the object in question, the ioctl waits for
> it to finish first, then queues the flip.  An optional wait flag causes the
> ioctl to wait for the flip to complete before the it returns.
> 
> If an execbuffer containing an object with a pending flip comes in, it will
> stall until the flip completes, to preserve glXSwapBuffers semantics.
> 
> Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>

Didn't do too bad in spotting the missing checks... ;-)

> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 2d797ff..0d6a6d3 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -829,6 +829,175 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> +static int i915_pipe_to_plane(struct drm_device *dev, int pipe)
> +{
> +	drm_i915_private_t *dev_priv = dev->dev_private;
> +	u32 reg, pipe_mask;
> +
> +	if (pipe == 0)
> +		pipe_mask = DISPPLANE_SEL_PIPE_A;
> +	else
> +		pipe_mask = DISPPLANE_SEL_PIPE_B;
> +
> +	pipe_mask |= DISPLAY_PLANE_ENABLE;
> +
> +	reg = I915_READ(DSPACNTR);
> +	if ((reg & (DISPLAY_PLANE_ENABLE | DISPPLANE_SEL_PIPE_MASK)) ==
> +	    pipe_mask)
> +		return 0;
> +
> +	reg = I915_READ(DSPBCNTR);
> +	if ((reg & (DISPLAY_PLANE_ENABLE | DISPPLANE_SEL_PIPE_MASK)) ==
> +	    pipe_mask)
> +		return 1;
> +
> +	return -1;
> +}
> +
> +bool
> +i915_gem_flip_pending(struct drm_gem_object *obj)
> +{
> +	struct drm_device *dev = obj->dev;
> +	drm_i915_private_t *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_object *obj_priv = obj->driver_private;
> +	unsigned long flags;
> +	bool pending = false;
> +
> +	spin_lock_irqsave(&dev_priv->vblank_lock, flags);
> +	if (!list_empty(&obj_priv->vblank_head))
> +		pending = true;
This has annoyed me every time (but I'm running out of things I can
complain about ;-), can we just say
  pending = !list_empty();
and be done with the conditional.

> +	spin_unlock_irqrestore(&dev_priv->vblank_lock, flags);
> +
> +	return pending;
> +}
> +
> +static int i915_gem_page_flip(struct drm_device *dev, void *data,
> +			      struct drm_file *file_priv)
> +{
> +	struct drm_i915_gem_page_flip *flip_data = data;
> +	drm_i915_private_t *dev_priv = dev->dev_private;
> +	struct drm_gem_object *obj;
> +	struct drm_i915_gem_object *obj_priv;
> +	unsigned long flags;
> +	uint32_t offset;
> +	unsigned int pitch, pipe, plane, tiled;
> +	int ret = 0, vblank_ref_err = 0, reqno;
> +	RING_LOCALS;
> +
> +	if (!(dev->driver->driver_features & DRIVER_GEM))
> +		return -ENODEV;
Guess I'll just have to comment this out to continue testing this
feature. :-)

> +
> +	/*
> +	 * Reject unknown flags so future userspace knows what we (don't)
> +	 * support
> +	 */
> +	if (flip_data->flags & (~I915_PAGE_FLIP_WAIT)) {
> +		DRM_ERROR("bad page flip flags\n");
> +		return -EINVAL;
> +	}
> +
> +	if ((pipe = flip_data->pipe) > 1) {
> +		DRM_ERROR("bad pipe\n");
> +		return -EINVAL;
> +	}
> +
> +	plane = i915_pipe_to_plane(dev, pipe);
> +	if (plane < 0) {
> +		DRM_ERROR("bad pipe (no planes enabled?)\n");
> +		return -EINVAL;
> +	}
> +
> +	obj = drm_gem_object_lookup(dev, file_priv, flip_data->handle);
> +	if (obj == NULL)
> +		return -EBADF;
> +
> +	/*
> +	 * Make sure the new buffer is in bounds
> +	 * FIXME: should probably check against current mode as well
> +	 */
> +	if (flip_data->offset >= obj->size) {
> +		DRM_ERROR("bad flip offset\n");
> +		ret = -EINVAL;
> +		goto out_unref_prelock;
> +	}
> +
> +	obj_priv = obj->driver_private;

Need to check obj_priv->stride&7 == 0. Hmm, that may be impossible
anyway.

> +
> +	if (i915_gem_flip_pending(obj))
> +		wait_for_completion(&obj_priv->vblank);
> +
> +	mutex_lock(&dev->struct_mutex);
> +	if (obj_priv->tiling_mode != I915_TILING_NONE &&
> +	    obj_priv->tiling_mode != I915_TILING_X) {
> +		DRM_ERROR("can only flip non-tiled or X tiled pages\n");
> +		ret = -EINVAL;
> +		goto out_unref;
> +	}
> +
> +	vblank_ref_err = drm_vblank_get(dev, pipe);
> +	if (!vblank_ref_err) {
Ok, this seems like a reasonable fallback. If vblank is not enabled then
you just queue the flip without waiting.

> +		ret = i915_gem_object_pin(obj, 0);
> +		if (ret) {
> +			DRM_ERROR("failed to pin object for flip\n");
> +			ret = -EBUSY;
What's the rationale for changing the reported error here? (And it might
be helpful to printk the original error value.)

> +			goto out_unref;
> +		}
> +	}
> +
> +	/*
> +	 * Put the object in the GTT domain before the flip,
> +	 * since there may be outstanding rendering
> +	 */
> +	i915_gem_object_set_to_gtt_domain(obj, 0);
Need to check, report and handle errors here. And yes, these do occur in
the wild for some as of yet unknown reason.

> +
> +	offset = obj_priv->gtt_offset + flip_data->offset;
> +
> +	pitch = obj_priv->stride;
> +	tiled = (obj_priv->tiling_mode == I915_TILING_X);
> +
> +	BEGIN_LP_RING(4);
> +	OUT_RING(CMD_OP_DISPLAYBUFFER_INFO | (plane << 20));
> +	OUT_RING(pitch);
> +	if (IS_I965G(dev)) {
> +		OUT_RING(offset | tiled);
> +		OUT_RING(((flip_data->x - 1) << 16) | (flip_data->y - 1));
> +	} else {
> +		OUT_RING(offset);
> +		OUT_RING(MI_NOOP);
> +	}
> +	ADVANCE_LP_RING();
> +
> +	reqno = i915_add_request(dev, 0);
Be consistent and call this seqno like the rest of the code, please.

> +
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	/*
> +	 * This is a bit racy; the flip above may have already happened
> +	 * by the time we get here.  If that happens, the new back buffer
> +	 * won't be available for rendering for one extra frame, since
> +	 * the vblank list won't have the object.
> +	 */
> +	spin_lock_irqsave(&dev_priv->vblank_lock, flags);
> +	if (!vblank_ref_err)
> +		list_add_tail(&obj_priv->vblank_head,
> +			      &dev_priv->mm.vblank_list[pipe]);
> +
> +	obj_priv->flip_seqno = reqno;
> +	spin_unlock_irqrestore(&dev_priv->vblank_lock, flags);
> +
> +	if (!vblank_ref_err && (flip_data->flags & I915_PAGE_FLIP_WAIT))
> +		wait_for_completion(&obj_priv->vblank);
> +
> +out_unref_prelock:
> +	/* Take lock again for unref */
> +	mutex_lock(&dev->struct_mutex);
> +out_unref:
> +	drm_gem_object_unreference(obj);
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	return ret;
> +}
> +
>  /**
>   * i915_probe_agp - get AGP bootup configuration
>   * @pdev: PCI device
> @@ -1307,6 +1476,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, 0),
>  	DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0),
>  	DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0),
> +	DRM_IOCTL_DEF(DRM_I915_GEM_PAGE_FLIP, i915_gem_page_flip, DRM_AUTH|DRM_MASTER),
>  };
>  
>  int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3951a12..148e80a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -159,6 +159,10 @@ typedef struct drm_i915_private {
>  	u32 irq_mask_reg;
>  	u32 pipestat[2];
>  
> +	/** Protects vblank list */
> +	spinlock_t vblank_lock;
> +	struct work_struct vblank_work;
> +
>  	int tex_lru_log_granularity;
>  	int allow_batchbuffer;
>  	struct mem_block *agp_heap;
> @@ -338,6 +342,11 @@ typedef struct drm_i915_private {
>  		 */
>  		struct delayed_work retire_work;
>  
> +		/**
> +		 * List of objects waiting on vblank events (one per pipe)
> +		 */
> +		struct list_head vblank_list[2];
> +
>  		uint32_t next_gem_seqno;
>  
>  		/**
> @@ -389,6 +398,13 @@ struct drm_i915_gem_object {
>  	/** This object's place on the active/flushing/inactive lists */
>  	struct list_head list;
>  
> +	/** Object's place on the vblank list (protected by vblank_lock)*/
> +	struct list_head vblank_head;
> +	/** sequence number for flip (when it passes the flip is done),
> +	 *  protected by vblank lock
> +	 */
> +	int flip_seqno;
> +
>  	/**
>  	 * This is set if the object is on the active or flushing lists
>  	 * (has pending rendering), and is not set if it's on inactive (ready
> @@ -457,6 +473,9 @@ struct drm_i915_gem_object {
>  
>  	/** for phy allocated objects */
>  	struct drm_i915_gem_phys_object *phys_obj;
> +
> +	/** for page flips and other vblank related blocks */
> +	struct completion vblank;
>  };
>  
>  /**
> @@ -516,6 +535,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
>  extern int i915_emit_box(struct drm_device *dev,
>  			 struct drm_clip_rect __user *boxes,
>  			 int i, int DR1, int DR4);
> +extern bool i915_gem_flip_pending(struct drm_gem_object *obj);
>  
>  /* i915_irq.c */
>  extern int i915_irq_emit(struct drm_device *dev, void *data,
> @@ -625,6 +645,9 @@ int i915_gem_attach_phys_object(struct drm_device *dev,
>  void i915_gem_detach_phys_object(struct drm_device *dev,
>  				 struct drm_gem_object *obj);
>  void i915_gem_free_all_phys_object(struct drm_device *dev);
> +uint32_t i915_add_request(struct drm_device *dev, uint32_t flush_domains);
> +int i915_seqno_passed(uint32_t seq1, uint32_t seq2);
> +uint32_t i915_get_gem_seqno(struct drm_device *dev);
>  
>  /* i915_gem_tiling.c */
>  void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 3e025d5..5270d25 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -900,7 +900,7 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
>   *
>   * Returned sequence numbers are nonzero on success.
>   */
> -static uint32_t
> +uint32_t
>  i915_add_request(struct drm_device *dev, uint32_t flush_domains)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> @@ -1028,7 +1028,7 @@ i915_gem_retire_request(struct drm_device *dev,
>  /**
>   * Returns true if seq1 is later than seq2.
>   */
> -static int
> +int
>  i915_seqno_passed(uint32_t seq1, uint32_t seq2)
>  {
>  	return (int32_t)(seq1 - seq2) >= 0;
> @@ -2498,6 +2498,25 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
>  		goto pre_mutex_err;
>  	}
>  
> +	/* Look up object handles */
> +	for (i = 0; i < args->buffer_count; i++) {
> +		object_list[i] = drm_gem_object_lookup(dev, file_priv,
> +						       exec_list[i].handle);
> +		if (object_list[i] == NULL) {
> +			DRM_ERROR("Invalid object handle %d at index %d\n",
> +				   exec_list[i].handle, i);
> +			ret = -EBADF;
> +			goto pre_mutex_err;
Hmm, can't jump straight to pre_mutex_err as that means you leak the
references on object already looked up. Take the mutex and goto err;

> +		}
> +
> +		if (i915_gem_flip_pending(object_list[i])) {
> +			struct drm_i915_gem_object *obj_priv;
> +
> +			obj_priv = object_list[i]->driver_private;
> +			wait_for_completion(&obj_priv->vblank);
> +		}
> +	}
> +
>  	mutex_lock(&dev->struct_mutex);
>  
>  	i915_verify_inactive(dev, __FILE__, __LINE__);
> @@ -2516,18 +2535,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
>  		goto pre_mutex_err;
>  	}
>  
> -	/* Look up object handles */
> -	for (i = 0; i < args->buffer_count; i++) {
> -		object_list[i] = drm_gem_object_lookup(dev, file_priv,
> -						       exec_list[i].handle);
> -		if (object_list[i] == NULL) {
> -			DRM_ERROR("Invalid object handle %d at index %d\n",
> -				   exec_list[i].handle, i);
> -			ret = -EBADF;
> -			goto err;
> -		}
> -	}
> -
>  	/* Pin and relocate */
>  	for (pin_tries = 0; ; pin_tries++) {
>  		ret = 0;
> @@ -2920,6 +2927,9 @@ int i915_gem_init_object(struct drm_gem_object *obj)
>  	obj_priv->obj = obj;
>  	obj_priv->fence_reg = I915_FENCE_REG_NONE;
>  	INIT_LIST_HEAD(&obj_priv->list);
> +	INIT_LIST_HEAD(&obj_priv->vblank_head);
> +
> +	init_completion(&obj_priv->vblank);
>  
>  	return 0;
>  }
> @@ -2980,8 +2990,10 @@ int
>  i915_gem_idle(struct drm_device *dev)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_object *obj_priv, *tmp;
> +	unsigned long irqflags;
>  	uint32_t seqno, cur_seqno, last_seqno;
> -	int stuck, ret;
> +	int stuck, ret, pipe;
>  
>  	mutex_lock(&dev->struct_mutex);
>  
> @@ -2995,10 +3007,25 @@ i915_gem_idle(struct drm_device *dev)
>  	 */
>  	dev_priv->mm.suspended = 1;
>  
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	/* Wait for any outstanding flips */
> +	spin_lock_irqsave(&dev_priv->vblank_lock, irqflags);
> +	for (pipe = 0; pipe < 2; pipe++) {
> +		list_for_each_entry_safe(obj_priv, tmp,
> +					 &dev_priv->mm.vblank_list[pipe],
> +					 vblank_head) {
> +			spin_unlock_irqrestore(&dev_priv->vblank_lock, irqflags);
> +			wait_for_completion(&obj_priv->vblank);
> +			spin_lock_irqsave(&dev_priv->vblank_lock, irqflags);
> +		}
> +	}
> +	spin_unlock_irqrestore(&dev_priv->vblank_lock, irqflags);
> +
>  	/* Cancel the retire work handler, wait for it to finish if running
>  	 */
> -	mutex_unlock(&dev->struct_mutex);
>  	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
> +
>  	mutex_lock(&dev->struct_mutex);
>  
>  	i915_kernel_lost_context(dev);
> @@ -3358,9 +3385,12 @@ i915_gem_load(struct drm_device *dev)
>  	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.request_list);
> +	INIT_LIST_HEAD(&dev_priv->mm.vblank_list[0]);
> +	INIT_LIST_HEAD(&dev_priv->mm.vblank_list[1]);
>  	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
>  			  i915_gem_retire_work_handler);
>  	dev_priv->mm.next_gem_seqno = 1;
> +	spin_lock_init(&dev_priv->vblank_lock);
>  
>  	/* Old X drivers will take 0-2 for front, back, depth buffers */
>  	dev_priv->fence_reg_start = 3;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 548ff2c..f50df88 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -187,6 +187,36 @@ u32 gm45_get_vblank_counter(struct drm_device *dev, int pipe)
>  	return I915_READ(reg);
>  }
>  
> +static void i915_vblank_work_func(struct work_struct *work)
> +{
> +	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
> +						    vblank_work);
> +	struct drm_device *dev = dev_priv->dev;
> +	struct drm_i915_gem_object *obj_priv, *tmp;
> +	unsigned long irqflags;
> +	int pipe, cur_seqno;
> +
> +	mutex_lock(&dev->struct_mutex);
> +
> +	spin_lock_irqsave(&dev_priv->vblank_lock, irqflags);
> +	for (pipe = 0; pipe < 2; pipe++) {
> +		list_for_each_entry_safe(obj_priv, tmp,
> +					 &dev_priv->mm.vblank_list[pipe],
> +					 vblank_head) {
> +			cur_seqno = i915_get_gem_seqno(dev);
> +			if (i915_seqno_passed(cur_seqno,
> +					      obj_priv->flip_seqno)) {
> +				list_del_init(&obj_priv->vblank_head);
> +				drm_vblank_put(dev, pipe);
> +				i915_gem_object_unpin(obj_priv->obj);
> +				complete(&obj_priv->vblank);
> +			}
> +		}
> +	}
> +	spin_unlock_irqrestore(&dev_priv->vblank_lock, irqflags);
> +	mutex_unlock(&dev->struct_mutex);
> +}
> +
>  irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
>  {
>  	struct drm_device *dev = (struct drm_device *) arg;
> @@ -269,6 +299,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
>  			drm_handle_vblank(dev, 1);
>  		}
>  
> +		if (vblank)
> +			schedule_work(&dev_priv->vblank_work);
> +
>  		if ((pipeb_stats & I915_LEGACY_BLC_EVENT_STATUS) ||
>  		    (iir & I915_ASLE_INTERRUPT))
>  			opregion_asle_intr(dev);
> @@ -533,6 +566,7 @@ void i915_driver_irq_preinstall(struct drm_device * dev)
>  	I915_WRITE(IMR, 0xffffffff);
>  	I915_WRITE(IER, 0x0);
>  	(void) I915_READ(IER);
> +	INIT_WORK(&dev_priv->vblank_work, i915_vblank_work_func);
>  }
>  
>  int i915_driver_irq_postinstall(struct drm_device *dev)
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index 912cd52..1ac4ded 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -184,6 +184,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_GEM_GET_TILING	0x22
>  #define DRM_I915_GEM_GET_APERTURE 0x23
>  #define DRM_I915_GEM_MMAP_GTT	0x24
> +#define DRM_I915_GEM_PAGE_FLIP	0x25
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -219,6 +220,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_SET_TILING	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct 
> drm_i915_gem_set_tiling)
>  #define DRM_IOCTL_I915_GEM_GET_TILING	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct 
> drm_i915_gem_get_tiling)
>  #define DRM_IOCTL_I915_GEM_GET_APERTURE	DRM_IOR  (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct 
> drm_i915_gem_get_aperture)
> +#define DRM_IOCTL_I915_GEM_PAGE_FLIP	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PAGE_FLIP, struct 
> drm_i915_gem_page_flip)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -654,4 +656,30 @@ struct drm_i915_gem_get_aperture {
>  	uint64_t aper_available_size;
>  };
>  
> +#define I915_PAGE_FLIP_WAIT (1<<0) /* block on page flip completion */
> +
> +struct drm_i915_gem_page_flip {
> +	/** Handle of new front buffer */
> +	uint32_t handle;
> +
> +	/** Offset into the object to use */
> +	uint64_t offset;
> +
> +	/**
> +	 * page flip flags (wait on flip only for now)
> +	 */
> +	uint32_t flags;
> +
> +	/**
> +	 * pipe to flip
> +	 */
> +	uint32_t pipe;
> +
> +	/**
> +	 * screen dimensions for flip
> +	 */
> +	uint32_t x;
> +	uint32_t y;
> +};
> +
>  #endif				/* _I915_DRM_H_ */
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx




More information about the Intel-gfx mailing list