[PATCH 3/5] pipelined-fence
Chris Wilson
chris at chris-wilson.co.uk
Wed Jun 28 12:28:38 UTC 2017
---
drivers/gpu/drm/i915/i915_gem.c | 1 +
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 ++-
drivers/gpu/drm/i915/i915_gem_fence_reg.c | 193 +++++++++++++++++++++++++----
drivers/gpu/drm/i915/i915_gem_fence_reg.h | 1 +
drivers/gpu/drm/i915/i915_vma.c | 1 +
drivers/gpu/drm/i915/i915_vma.h | 4 +
6 files changed, 186 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3d2ae70a6ec2..eecc154f6e1b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4896,6 +4896,7 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
fence->i915 = dev_priv;
fence->id = i;
list_add_tail(&fence->link, &dev_priv->mm.fence_list);
+ init_request_active(&fence->pipelined, NULL);
}
i915_gem_restore_fences(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0648da739215..718bb75ad387 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -359,11 +359,12 @@ eb_pin_vma(struct i915_execbuffer *eb,
return;
if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
- if (unlikely(i915_vma_pin_fence(vma))) {
+ if (unlikely(i915_vma_reserve_fence(vma))) {
i915_vma_unpin(vma);
return;
}
+ entry->flags &= ~EXEC_OBJECT_ASYNC;
if (vma->fence)
entry->flags |= __EXEC_OBJECT_HAS_FENCE;
}
@@ -558,12 +559,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
GEM_BUG_ON(eb_vma_misplaced(entry, vma));
if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
- err = i915_vma_pin_fence(vma);
+ err = i915_vma_reserve_fence(vma);
if (unlikely(err)) {
i915_vma_unpin(vma);
return err;
}
+ entry->flags &= ~EXEC_OBJECT_ASYNC;
if (vma->fence)
entry->flags |= __EXEC_OBJECT_HAS_FENCE;
}
@@ -1842,6 +1844,12 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
if (unlikely(obj->cache_dirty && !obj->cache_coherent))
i915_gem_clflush_object(obj, 0);
+ if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
+ err = i915_vma_emit_pipelined_fence(vma, eb->request);
+ if (err)
+ return err;
+ }
+
err = i915_gem_request_await_object
(eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
if (err)
@@ -1926,9 +1934,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
obj->base.read_domains = 0;
}
obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
-
- if (flags & EXEC_OBJECT_NEEDS_FENCE)
- i915_gem_active_set(&vma->last_fence, req);
}
static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index a2806e76395d..0d69e79d2227 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -55,10 +55,9 @@
* CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
*/
-#define pipelined 0
-
-static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
- struct i915_vma *vma)
+static int i965_write_fence_reg(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma,
+ struct drm_i915_gem_request *pipelined)
{
i915_reg_t fence_reg_lo, fence_reg_hi;
int fence_pitch_shift;
@@ -110,11 +109,30 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
I915_WRITE(fence_reg_hi, upper_32_bits(val));
I915_WRITE(fence_reg_lo, lower_32_bits(val));
POSTING_READ(fence_reg_lo);
+ } else {
+ u32 *cs;
+
+ cs = intel_ring_begin(pipelined, 8);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(3);
+ *cs++ = i915_mmio_reg_offset(fence_reg_lo);
+ *cs++ = 0;
+ *cs++ = i915_mmio_reg_offset(fence_reg_hi);
+ *cs++ = upper_32_bits(val);
+ *cs++ = i915_mmio_reg_offset(fence_reg_lo);
+ *cs++ = lower_32_bits(val);
+ *cs++ = MI_NOOP;
+ intel_ring_advance(pipelined, cs);
}
+
+ return 0;
}
-static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
- struct i915_vma *vma)
+static int i915_write_fence_reg(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma,
+ struct drm_i915_gem_request *pipelined)
{
u32 val;
@@ -150,11 +168,26 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
I915_WRITE(reg, val);
POSTING_READ(reg);
+ } else {
+ u32 *cs;
+
+ cs = intel_ring_begin(pipelined, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(FENCE_REG(fence->id));
+ *cs++ = val;
+ *cs++ = MI_NOOP;
+ intel_ring_advance(pipelined, cs);
}
+
+ return 0;
}
-static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
- struct i915_vma *vma)
+static int i830_write_fence_reg(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma,
+ struct drm_i915_gem_request *pipelined)
{
u32 val;
@@ -182,29 +215,49 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
I915_WRITE(reg, val);
POSTING_READ(reg);
+ } else {
+ u32 *cs;
+
+ cs = intel_ring_begin(pipelined, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(FENCE_REG(fence->id));
+ *cs++ = val;
+ *cs++ = MI_NOOP;
+ intel_ring_advance(pipelined, cs);
}
+
+ return 0;
}
-static void fence_write(struct drm_i915_fence_reg *fence,
- struct i915_vma *vma)
+static int fence_write(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma,
+ struct drm_i915_gem_request *rq)
{
+ int err;
+
/* Previous access through the fence register is marshalled by
* the mb() inside the fault handlers (i915_gem_release_mmaps)
* and explicitly managed for internal users.
*/
if (IS_GEN2(fence->i915))
- i830_write_fence_reg(fence, vma);
+ err = i830_write_fence_reg(fence, vma, rq);
else if (IS_GEN3(fence->i915))
- i915_write_fence_reg(fence, vma);
+ err = i915_write_fence_reg(fence, vma, rq);
else
- i965_write_fence_reg(fence, vma);
+ err = i965_write_fence_reg(fence, vma, rq);
+ if (err)
+ return err;
/* Access through the fenced region afterwards is
* ordered by the posting reads whilst writing the registers.
*/
fence->dirty = false;
+ return 0;
}
static int fence_update(struct drm_i915_fence_reg *fence,
@@ -212,17 +265,15 @@ static int fence_update(struct drm_i915_fence_reg *fence,
{
int ret;
+ ret = i915_gem_active_retire(&fence->pipelined,
+ &fence->i915->drm.struct_mutex);
+ if (ret)
+ return ret;
+
if (vma) {
if (!i915_vma_is_map_and_fenceable(vma))
return -EINVAL;
- if (WARN(!i915_gem_object_get_stride(vma->obj) ||
- !i915_gem_object_get_tiling(vma->obj),
- "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
- i915_gem_object_get_stride(vma->obj),
- i915_gem_object_get_tiling(vma->obj)))
- return -EINVAL;
-
ret = i915_gem_active_retire(&vma->last_fence,
&vma->obj->base.dev->struct_mutex);
if (ret)
@@ -253,7 +304,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
* to the runtime resume, see i915_gem_restore_fences().
*/
if (intel_runtime_pm_get_if_in_use(fence->i915)) {
- fence_write(fence, vma);
+ fence_write(fence, vma, NULL);
intel_runtime_pm_put(fence->i915);
}
@@ -290,6 +341,8 @@ int i915_vma_put_fence(struct i915_vma *vma)
if (fence->pin_count)
return -EBUSY;
+ GEM_BUG_ON(fence->vma != vma);
+
return fence_update(fence, NULL);
}
@@ -342,10 +395,15 @@ i915_vma_pin_fence(struct i915_vma *vma)
assert_rpm_wakelock_held(vma->vm->i915);
/* Just update our place in the LRU if our fence is getting reused. */
- if (vma->fence) {
- fence = vma->fence;
+ fence = vma->fence;
+ if (fence) {
fence->pin_count++;
if (!fence->dirty) {
+ err = i915_gem_active_retire(&fence->pipelined,
+ &fence->i915->drm.struct_mutex);
+ if (err)
+ goto err_unpin;
+
list_move_tail(&fence->link,
&fence->i915->mm.fence_list);
return 0;
@@ -369,6 +427,93 @@ i915_vma_pin_fence(struct i915_vma *vma)
return err;
}
+int i915_vma_reserve_fence(struct i915_vma *vma)
+{
+ struct drm_i915_fence_reg *fence;
+
+ lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+ GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+ GEM_BUG_ON(!i915_vma_is_pinned(vma));
+
+ fence = vma->fence;
+ if (!fence) {
+ if (!i915_gem_object_is_tiled(vma->obj))
+ return 0;
+
+ if (!i915_vma_is_map_and_fenceable(vma))
+ return -EINVAL;
+
+ fence = fence_find(vma->vm->i915);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+
+ vma->fence = fence;
+
+ if (fence->vma) {
+ i915_gem_release_mmap(fence->vma->obj);
+ fence->vma->fence = NULL;
+ }
+ fence->vma = vma;
+ fence->dirty = true;
+ }
+ fence->pin_count++;
+ list_move_tail(&fence->link, &fence->i915->mm.fence_list);
+
+ GEM_BUG_ON(!i915_gem_object_is_tiled(vma->obj));
+ GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
+ GEM_BUG_ON(vma->node.size != vma->fence_size);
+ GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_alignment));
+
+ return 0;
+}
+
+int i915_vma_emit_pipelined_fence(struct i915_vma *vma,
+ struct drm_i915_gem_request *rq)
+{
+ struct drm_i915_fence_reg *fence = vma->fence;
+ struct drm_i915_gem_request *prev;
+ int err;
+
+ lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+ GEM_BUG_ON(fence && !fence->pin_count);
+
+ if (!fence)
+ goto out;
+
+ prev = i915_gem_active_raw(&fence->pipelined,
+ &fence->i915->drm.struct_mutex);
+ if (prev) {
+ err = i915_gem_request_await_dma_fence(rq, &prev->fence);
+ if (err)
+ return err;
+ }
+
+ if (!fence->dirty)
+ goto out;
+
+ GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
+
+ if (fence->vma) {
+ prev = i915_gem_active_raw(&fence->vma->last_fence,
+ &fence->i915->drm.struct_mutex);
+ if (prev) {
+ err = i915_gem_request_await_dma_fence(rq,
+ &prev->fence);
+ if (err)
+ return err;
+ }
+ }
+
+ err = fence_write(fence, vma, rq);
+ if (err)
+ return err;
+
+ i915_gem_active_set(&fence->pipelined, rq);
+out:
+ i915_gem_active_set(&vma->last_fence, rq);
+ return 0;
+}
+
/**
* i915_gem_revoke_fences - revoke fence state
* @dev_priv: i915 device private
@@ -422,7 +567,7 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
vma = NULL;
}
- fence_write(reg, vma);
+ fence_write(reg, vma, NULL);
reg->vma = vma;
}
}
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index 99a31ded4dfd..ce45972fc5c6 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -47,6 +47,7 @@ struct drm_i915_fence_reg {
* command (such as BLT on gen2/3), as a "fence".
*/
bool dirty;
+ struct i915_gem_active pipelined;
};
#endif
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index efbfee8eac99..0c489090d4ab 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -728,6 +728,7 @@ int i915_vma_unbind(struct i915_vma *vma)
__i915_vma_iounmap(vma);
vma->flags &= ~I915_VMA_CAN_FENCE;
}
+ GEM_BUG_ON(vma->fence);
if (likely(!vma->vm->closed)) {
trace_i915_vma_unbind(vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 19f58af4f1bf..f0dc6eaebeab 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -367,5 +367,9 @@ i915_vma_unpin_fence(struct i915_vma *vma)
__i915_vma_unpin_fence(vma);
}
+int __must_check i915_vma_reserve_fence(struct i915_vma *vma);
+int i915_vma_emit_pipelined_fence(struct i915_vma *vma,
+ struct drm_i915_gem_request *rq);
+
#endif
--
2.13.1
More information about the Intel-gfx-trybot
mailing list