[PATCH 4/4] drm/vmwgfx: Rework locking around register accesses
Thomas Hellstrom
thellstrom at vmware.com
Fri Oct 30 02:42:46 PDT 2015
Relax the required locking to, instead of disabling irqs, disable tasklets.
Allow the caller to perform the locking using utility functions, which
allows the caller to combine a number of register accesses within the
same critical section. Implement this for capability reads and command
buffer submission.
Signed-off-by: Thomas Hellstrom <thellstrom at vmware.com>
Reviewed-by: Sinclair Yeh <syeh at vmware.com>
---
drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 8 +-
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 9 +--
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 130 ++++++++++++++++++++++++++++-----
drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 6 +-
drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 40 +++++++---
drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 20 ++---
drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 2 +-
7 files changed, 163 insertions(+), 52 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
index 8a76821..b8d2436 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
@@ -290,18 +290,20 @@ void vmw_cmdbuf_header_free(struct vmw_cmdbuf_header *header)
*/
static int vmw_cmdbuf_header_submit(struct vmw_cmdbuf_header *header)
{
- struct vmw_cmdbuf_man *man = header->man;
+ struct vmw_private *dev_priv = header->man->dev_priv;
u32 val;
if (sizeof(header->handle) > 4)
val = (header->handle >> 32);
else
val = 0;
- vmw_write(man->dev_priv, SVGA_REG_COMMAND_HIGH, val);
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, SVGA_REG_COMMAND_HIGH, val);
val = (header->handle & 0xFFFFFFFFULL);
val |= header->cb_context & SVGA_CB_CONTEXT_MASK;
- vmw_write(man->dev_priv, SVGA_REG_COMMAND_LOW, val);
+ __vmw_write(dev_priv, SVGA_REG_COMMAND_LOW, val);
+ vmw_hw_unlock(dev_priv, true);
return header->cb_header->status;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index a09cf85..2a5496a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -631,7 +631,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
ttm_lock_init(&dev_priv->reservation_sem);
spin_lock_init(&dev_priv->hw_lock);
spin_lock_init(&dev_priv->waiter_lock);
- spin_lock_init(&dev_priv->cap_lock);
spin_lock_init(&dev_priv->svga_lock);
for (i = vmw_res_context; i < vmw_res_max; ++i) {
@@ -854,10 +853,10 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
}
if (dev_priv->has_mob) {
- spin_lock(&dev_priv->cap_lock);
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX);
- dev_priv->has_dx = !!vmw_read(dev_priv, SVGA_REG_DEV_CAP);
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX);
+ dev_priv->has_dx = !!__vmw_read(dev_priv, SVGA_REG_DEV_CAP);
+ vmw_hw_unlock(dev_priv, true);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a8ae9df..f7dcbcc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -385,7 +385,6 @@ struct vmw_private {
bool has_gmr;
bool has_mob;
spinlock_t hw_lock;
- spinlock_t cap_lock;
bool has_dx;
/*
@@ -546,34 +545,127 @@ static inline struct vmw_master *vmw_master(struct drm_master *master)
return (struct vmw_master *) master->driver_priv;
}
-/*
- * The locking here is fine-grained, so that it is performed once
- * for every read- and write operation. This is of course costly, but we
- * don't perform much register access in the timing critical paths anyway.
- * Instead we have the extra benefit of being sure that we don't forget
- * the hw lock around register accesses.
+/**
+ * vmw_hw_lock - Perform necessary locking for register access
+ *
+ * @dev_priv: Pointer to device private structure
+ * @lock_bh: Disable bottom halves. Always set to true unless irqs are disabled.
*/
-static inline void vmw_write(struct vmw_private *dev_priv,
- unsigned int offset, uint32_t value)
+static inline void vmw_hw_lock(struct vmw_private *dev_priv,
+ bool lock_bh)
{
- unsigned long irq_flags;
+ if (lock_bh)
+ spin_lock_bh(&dev_priv->hw_lock);
+ else
+ spin_lock(&dev_priv->hw_lock);
+}
- spin_lock_irqsave(&dev_priv->hw_lock, irq_flags);
+/**
+ * vmw_hw_unlock - Perform necessary unlocking after register access
+ *
+ * @dev_priv: Pointer to device private structure
+ * @unlock_bh: Disable bottom halves. Always set to true if it was set to tru
+ * in the corresponding lock.
+ */
+static inline void vmw_hw_unlock(struct vmw_private *dev_priv,
+ bool unlock_bh)
+{
+ if (unlock_bh)
+ spin_unlock_bh(&dev_priv->hw_lock);
+ else
+ spin_unlock(&dev_priv->hw_lock);
+}
+
+/**
+ * __vmw_write - Write a value to a device register with external locking.
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * Note that the caller must ensure that when calling this function,
+ * @dev_priv::hw_lock must be held and that we're safe against racing
+ * against tasklets.
+ */
+static inline void __vmw_write(struct vmw_private *dev_priv,
+ unsigned int offset, uint32_t value)
+{
+#ifdef CONFIG_LOCKDEP
+ lockdep_assert_held_once(&dev_priv->hw_lock.rlock);
+ WARN_ON_ONCE(!(in_softirq() || irqs_disabled()));
+#endif
outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
outl(value, dev_priv->io_start + VMWGFX_VALUE_PORT);
- spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags);
}
-static inline uint32_t vmw_read(struct vmw_private *dev_priv,
- unsigned int offset)
+/**
+ * __vmw_read - Read a value from a device register with external locking.
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * Note that the caller must ensure that when calling this function,
+ * @dev_priv::hw_lock is be held and that we're safe against racing
+ * against tasklets.
+ */
+static inline uint32_t __vmw_read(struct vmw_private *dev_priv,
+ unsigned int offset)
{
- unsigned long irq_flags;
u32 val;
- spin_lock_irqsave(&dev_priv->hw_lock, irq_flags);
+#ifdef CONFIG_LOCKDEP
+ lockdep_assert_held_once(&dev_priv->hw_lock.rlock);
+ WARN_ON_ONCE(!(in_softirq() || irqs_disabled()));
+#endif
outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
val = inl(dev_priv->io_start + VMWGFX_VALUE_PORT);
- spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags);
+
+ return val;
+}
+
+/**
+ * vmw_write - Write a value to a device register
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * This function may not be called when irqs are disabled, since
+ * its call to vmw_hw_unlock will re-enable irqs.
+ */
+static inline void vmw_write(struct vmw_private *dev_priv,
+ unsigned int offset, uint32_t value)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(irqs_disabled());
+#endif
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, offset, value);
+ vmw_hw_unlock(dev_priv, true);
+}
+
+/**
+ * vmw_write - Read a value from a device register
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * This function may not be called when irqs are disabled, since
+ * its call to vmw_hw_unlock will re-enable irqs.
+ */
+static inline uint32_t vmw_read(struct vmw_private *dev_priv,
+ unsigned int offset)
+{
+ u32 val;
+
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(irqs_disabled());
+#endif
+ vmw_hw_lock(dev_priv, true);
+ val = __vmw_read(dev_priv, offset);
+ vmw_hw_unlock(dev_priv, true);
return val;
}
@@ -722,8 +814,8 @@ extern void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes);
extern void vmw_fifo_commit_flush(struct vmw_private *dev_priv, uint32_t bytes);
extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
uint32_t *seqno);
-extern void vmw_fifo_ping_host_locked(struct vmw_private *, uint32_t reason);
-extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
+extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason,
+ bool lock_bh);
extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
extern int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index f40c36e..ac66aad 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -149,7 +149,7 @@ static bool vmw_fence_enable_signaling(struct fence *f)
if (seqno - fence->base.seqno < VMW_FENCE_WRAP)
return false;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, false);
return true;
}
@@ -183,7 +183,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
if (likely(vmw_fence_obj_signaled(fence)))
return timeout;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
vmw_seqno_waiter_add(dev_priv);
spin_lock_bh(f->lock);
@@ -525,7 +525,7 @@ void vmw_fence_obj_flush(struct vmw_fence_obj *fence)
{
struct vmw_private *dev_priv = fman_from_fence(fence)->dev_priv;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
}
static void vmw_fence_destroy(struct vmw_fence_obj *fence)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index a8baf5f..a70cc88 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -49,10 +49,10 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
if (!dev_priv->has_mob)
return false;
- spin_lock(&dev_priv->cap_lock);
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D);
- result = vmw_read(dev_priv, SVGA_REG_DEV_CAP);
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D);
+ result = __vmw_read(dev_priv, SVGA_REG_DEV_CAP);
+ vmw_hw_unlock(dev_priv, true);
return (result != 0);
}
@@ -163,14 +163,32 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
return 0;
}
-void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
+/**
+ * vmw_fifo_ping_host - Notify the device that there are device commands
+ * to process
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @reason: A valid reason to ping the host. See device headers.
+ * @lock_bh: Always set to true unless irqs are disabled in which case
+ * it must be set to false.
+ *
+ * Aka the device "doorbell". The device has an optimization that allows
+ * avoiding calling the relatively expensive register write to
+ * SVGA_REG_SYNC if the SVGA_FIFO_BUSY mmio register is set to 1, meaning
+ * that the doorbell has already been called. The SVGA_FIFO_BUSY mmio register
+ * will be cleared by the device when a new write to SVGA_REG_SYNC is needed
+ * to notify the device of pending commands.
+ */
+void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason,
+ bool lock_bh)
{
u32 *fifo_mem = dev_priv->mmio_virt;
- preempt_disable();
- if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0)
- vmw_write(dev_priv, SVGA_REG_SYNC, reason);
- preempt_enable();
+ if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0) {
+ vmw_hw_lock(dev_priv, lock_bh);
+ __vmw_write(dev_priv, SVGA_REG_SYNC, reason);
+ vmw_hw_unlock(dev_priv, lock_bh);
+ }
}
void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
@@ -256,7 +274,7 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
if (likely(!vmw_fifo_is_full(dev_priv, bytes)))
return 0;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL, true);
if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
return vmw_fifo_wait_noirq(dev_priv, bytes,
interruptible, timeout);
@@ -490,7 +508,7 @@ static void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
vmw_mmio_write(0, fifo_mem + SVGA_FIFO_RESERVED);
mb();
up_write(&fifo_state->rwsem);
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
mutex_unlock(&fifo_state->fifo_mutex);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index b8c6a03..8ce0886 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -159,14 +159,14 @@ static int vmw_fill_compat_cap(struct vmw_private *dev_priv, void *bounce,
(pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32);
compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS;
- spin_lock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
for (i = 0; i < max_size; ++i) {
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
compat_cap->pairs[i][0] = i;
compat_cap->pairs[i][1] = vmw_mask_multisample
- (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP));
+ (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP));
}
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_unlock(dev_priv, true);
return 0;
}
@@ -216,13 +216,13 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
if (num > SVGA3D_DEVCAP_MAX)
num = SVGA3D_DEVCAP_MAX;
- spin_lock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
for (i = 0; i < num; ++i) {
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
*bounce32++ = vmw_mask_multisample
- (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP));
+ (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP));
}
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_unlock(dev_priv, true);
} else if (gb_objects) {
ret = vmw_fill_compat_cap(dev_priv, bounce, size);
if (unlikely(ret != 0))
@@ -420,7 +420,7 @@ unsigned int vmw_fops_poll(struct file *filp, struct poll_table_struct *wait)
struct vmw_private *dev_priv =
vmw_priv(file_priv->minor->dev);
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
return drm_poll(filp, wait);
}
@@ -443,6 +443,6 @@ ssize_t vmw_fops_read(struct file *filp, char __user *buffer,
struct vmw_private *dev_priv =
vmw_priv(file_priv->minor->dev);
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
return drm_read(filp, buffer, count, offset);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index 12aaed7..af6970b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -246,7 +246,7 @@ int vmw_wait_seqno(struct vmw_private *dev_priv,
if (likely(vmw_seqno_passed(dev_priv, seqno)))
return 0;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
if (!(fifo->capabilities & SVGA_FIFO_CAP_FENCE))
return vmw_fallback_wait(dev_priv, lazy, true, seqno,
--
2.4.3
More information about the dri-devel
mailing list