[PATCH 16/18] drm/i915: Forgo last_fence active request tracking
Chris Wilson
chris at chris-wilson.co.uk
Wed Jun 5 07:36:08 UTC 2019
We were using the last_fence to track the last request that used this
vma that might be interpreted by a fence register and forced ourselves
to wait for this request before modifying any fence register that
overlapped our vma. Due to requirement that we need to track any XY_BLT
command, linear or tiled, this in effect meant that we have to track the
vma for its active lifespan anyway, so we can forgo the explicit
last_fence tracking and just use the whole vma->active.
Another solution would be to pipeline the register updates, and would
help resolve some long running stalls for gen3 (but only gen 2 and 3!)
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_reset.c | 2 +-
drivers/gpu/drm/i915/gvt/gvt.h | 2 +-
drivers/gpu/drm/i915/i915_debugfs.c | 24 ++++------
drivers/gpu/drm/i915/i915_drv.c | 4 +-
drivers/gpu/drm/i915/i915_drv.h | 5 +-
drivers/gpu/drm/i915/i915_gem.c | 58 ++---------------------
drivers/gpu/drm/i915/i915_gem_fence_reg.c | 46 ++++++++++++++++--
drivers/gpu/drm/i915/i915_gem_gtt.c | 1 -
drivers/gpu/drm/i915/i915_gpu_error.c | 9 ++--
drivers/gpu/drm/i915/i915_vma.c | 11 -----
drivers/gpu/drm/i915/i915_vma.h | 1 -
drivers/gpu/drm/i915/intel_device_info.h | 1 +
12 files changed, 66 insertions(+), 98 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 847e32c8d7dc..604d7ed4c931 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -695,7 +695,7 @@ static void revoke_mmaps(struct drm_i915_private *i915)
{
int i;
- for (i = 0; i < i915->num_fence_regs; i++) {
+ for (i = 0; i < RUNTIME_INFO(i915)->num_fences; i++) {
struct drm_vma_offset_node *node;
struct i915_vma *vma;
u64 vma_offset;
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index b54f2bdc13a4..06be876c4d2b 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -390,7 +390,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt);
#define gvt_hidden_gmadr_end(gvt) (gvt_hidden_gmadr_base(gvt) \
+ gvt_hidden_sz(gvt) - 1)
-#define gvt_fence_sz(gvt) (gvt->dev_priv->num_fence_regs)
+#define gvt_fence_sz(gvt) (RUNTIME_INFO((gvt)->dev_priv)->num_fences)
/* Aperture/GM space definitions for vGPU */
#define vgpu_aperture_offset(vgpu) ((vgpu)->gm.low_gm_node.start)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4a7f7ef1c9ea..f5562198cec0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -217,9 +217,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
}
}
if (vma->fence)
- seq_printf(m, " , fence: %d%s",
- vma->fence->id,
- i915_active_request_isset(&vma->last_fence) ? "*" : "");
+ seq_printf(m, " , fence: %d", vma->fence->id);
seq_puts(m, ")");
}
if (obj->stolen)
@@ -698,20 +696,17 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
{
- struct drm_i915_private *dev_priv = node_to_i915(m->private);
- struct drm_device *dev = &dev_priv->drm;
- int i, ret;
-
- ret = mutex_lock_interruptible(&dev->struct_mutex);
- if (ret)
- return ret;
+ struct drm_i915_private *i915 = node_to_i915(m->private);
+ int num_fences = RUNTIME_INFO(i915)->num_fences;
+ int i;
- seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
- for (i = 0; i < dev_priv->num_fence_regs; i++) {
- struct i915_vma *vma = dev_priv->fence_regs[i].vma;
+ seq_printf(m, "Total fences = %d\n", num_fences);
+ rcu_read_lock();
+ for (i = 0; i < num_fences; i++) {
+ struct i915_vma *vma = READ_ONCE(i915->fence_regs[i].vma);
seq_printf(m, "Fence %d, pin count = %d, object = ",
- i, dev_priv->fence_regs[i].pin_count);
+ i, i915->fence_regs[i].pin_count);
if (!vma)
seq_puts(m, "unused");
else
@@ -719,7 +714,6 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
seq_putc(m, '\n');
}
- mutex_unlock(&dev->struct_mutex);
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1af6751e1b36..189a5c46c715 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -350,7 +350,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = pdev->revision;
break;
case I915_PARAM_NUM_FENCES_AVAIL:
- value = dev_priv->num_fence_regs;
+ value = RUNTIME_INFO(dev_priv)->num_fences;
break;
case I915_PARAM_HAS_OVERLAY:
value = dev_priv->overlay ? 1 : 0;
@@ -1625,7 +1625,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
intel_uncore_sanitize(dev_priv);
intel_gt_init_workarounds(dev_priv);
- i915_gem_load_init_fences(dev_priv);
+ i915_gem_init_fences(dev_priv);
/* On the 945G/GM, the chipset reports the MSI capability on the
* integrated graphics even though the support isn't actually there
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4e15cd9d3064..c8ed8027e3f5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1482,8 +1482,7 @@ struct drm_i915_private {
/* protects panel power sequencer state */
struct mutex pps_mutex;
- struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
- int num_fence_regs; /* 8 on pre-965, 16 otherwise */
+ struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
unsigned int fsb_freq, mem_freq, is_ddr3;
unsigned int skl_preferred_vco_freq;
@@ -2536,7 +2535,7 @@ void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv);
void i915_gem_sanitize(struct drm_i915_private *i915);
int i915_gem_init_early(struct drm_i915_private *dev_priv);
void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
-void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
+void i915_gem_init_fences(struct drm_i915_private *dev_priv);
int i915_gem_freeze(struct drm_i915_private *dev_priv);
int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c9507651fa30..8773db8a07ba 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -886,7 +886,6 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
{
struct drm_i915_gem_object *obj, *on;
- int i;
/*
* Only called during RPM suspend. All users of the userfault_list
@@ -899,30 +898,13 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
&dev_priv->mm.userfault_list, userfault_link)
__i915_gem_object_release_mmap(obj);
- /* The fence will be lost when the device powers down. If any were
+ /*
+ * The fence will be lost when the device powers down. If any were
* in use by hardware (i.e. they are pinned), we should not be powering
* down! All other fences will be reacquired by the user upon waking.
+ *
+ * On resume, we call i915_gem_eestore_fences() to reset the registers.
*/
- for (i = 0; i < dev_priv->num_fence_regs; i++) {
- struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
-
- /* Ideally we want to assert that the fence register is not
- * live at this point (i.e. that no piece of code will be
- * trying to write through fence + GTT, as that both violates
- * our tracking of activity and associated locking/barriers,
- * but also is illegal given that the hw is powered down).
- *
- * Previously we used reg->pin_count as a "liveness" indicator.
- * That is not sufficient, and we need a more fine-grained
- * tool if we want to have a sanity check here.
- */
-
- if (!reg->vma)
- continue;
-
- GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
- reg->dirty = true;
- }
}
static int wait_for_engines(struct drm_i915_private *i915)
@@ -1720,38 +1702,6 @@ void i915_gem_init_mmio(struct drm_i915_private *i915)
i915_gem_sanitize(i915);
}
-void
-i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
-{
- int i;
-
- if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
- !IS_CHERRYVIEW(dev_priv))
- dev_priv->num_fence_regs = 32;
- else if (INTEL_GEN(dev_priv) >= 4 ||
- IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
- IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
- dev_priv->num_fence_regs = 16;
- else
- dev_priv->num_fence_regs = 8;
-
- if (intel_vgpu_active(dev_priv))
- dev_priv->num_fence_regs =
- I915_READ(vgtif_reg(avail_rs.fence_num));
-
- /* Initialize fence registers to zero */
- for (i = 0; i < dev_priv->num_fence_regs; i++) {
- struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
-
- fence->i915 = dev_priv;
- fence->id = i;
- list_add_tail(&fence->link, &dev_priv->mm.fence_list);
- }
- i915_gem_restore_fences(dev_priv);
-
- i915_gem_detect_bit_6_swizzle(dev_priv);
-}
-
static void i915_gem_init__mm(struct drm_i915_private *i915)
{
spin_lock_init(&i915->mm.obj_lock);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 10aa6e350bfa..707b164109b8 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -24,6 +24,7 @@
#include <drm/i915_drm.h>
#include "i915_drv.h"
+#include "i915_pvinfo.h"
#include "i915_scatterlist.h"
/**
@@ -229,16 +230,14 @@ static int fence_update(struct drm_i915_fence_reg *fence,
i915_gem_object_get_tiling(vma->obj)))
return -EINVAL;
- ret = i915_active_request_retire(&vma->last_fence,
- &vma->obj->base.dev->struct_mutex);
+ ret = i915_active_wait(&vma->active);
if (ret)
return ret;
}
old = xchg(&fence->vma, NULL);
if (old) {
- ret = i915_active_request_retire(&old->last_fence,
- &old->obj->base.dev->struct_mutex);
+ ret = i915_active_wait(&fence->vma->active);
if (ret) {
fence->vma = old;
return ret;
@@ -461,7 +460,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915)
int i;
rcu_read_lock(); /* keep obj alive as we dereference */
- for (i = 0; i < i915->num_fence_regs; i++) {
+ for (i = 0; i < RUNTIME_INFO(i915)->num_fences; i++) {
struct drm_i915_fence_reg *reg = &i915->fence_regs[i];
struct i915_vma *vma = READ_ONCE(reg->vma);
@@ -798,3 +797,40 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
i++;
}
}
+
+void i915_gem_init_fences(struct drm_i915_private *dev_priv)
+{
+ int num_fences;
+ int i;
+
+ i915_gem_detect_bit_6_swizzle(dev_priv);
+
+ if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
+ !IS_CHERRYVIEW(dev_priv))
+ num_fences = 32;
+ else if (INTEL_GEN(dev_priv) >= 4 ||
+ IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
+ IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
+ num_fences = 16;
+ else
+ num_fences = 8;
+
+ if (intel_vgpu_active(dev_priv))
+ num_fences = I915_READ(vgtif_reg(avail_rs.fence_num));
+
+ GEM_BUG_ON(num_fences > ARRAY_SIZE(dev_priv->fence_regs));
+ num_fences = min_t(int, num_fences, ARRAY_SIZE(dev_priv->fence_regs));
+
+ /* Initialize fence registers to zero */
+ for (i = 0; i < num_fences; i++) {
+ struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
+
+ fence->i915 = dev_priv;
+ fence->id = i;
+ list_add_tail(&fence->link, &dev_priv->mm.fence_list);
+
+ fence_write(fence, NULL);
+ }
+
+ RUNTIME_INFO(dev_priv)->num_fences = num_fences;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6fcf702d7ec1..56503b4ee217 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2062,7 +2062,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
return ERR_PTR(-ENOMEM);
i915_active_init(i915, &vma->active, NULL);
- INIT_ACTIVE_REQUEST(&vma->last_fence);
vma->vm = &ggtt->vm;
vma->ops = &pd_vma_ops;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 67f56b4a4e3a..dc89021d102a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1123,19 +1123,20 @@ static u32 i915_error_generate_code(struct i915_gpu_state *error,
static void gem_record_fences(struct i915_gpu_state *error)
{
struct drm_i915_private *dev_priv = error->i915;
+ int num_fences = RUNTIME_INFO(dev_priv)->num_fences;
int i;
if (INTEL_GEN(dev_priv) >= 6) {
- for (i = 0; i < dev_priv->num_fence_regs; i++)
+ for (i = 0; i < num_fences; i++)
error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i));
} else if (INTEL_GEN(dev_priv) >= 4) {
- for (i = 0; i < dev_priv->num_fence_regs; i++)
+ for (i = 0; i < num_fences; i++)
error->fence[i] = I915_READ64(FENCE_REG_965_LO(i));
} else {
- for (i = 0; i < dev_priv->num_fence_regs; i++)
+ for (i = 0; i < num_fences; i++)
error->fence[i] = I915_READ(FENCE_REG(i));
}
- error->nfence = i;
+ error->nfence = num_fences;
}
static void error_record_engine_registers(struct i915_gpu_state *error,
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index a3cb08f602f9..e1e3fe9eec19 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -138,7 +138,6 @@ vma_create(struct drm_i915_gem_object *obj,
vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
- INIT_ACTIVE_REQUEST(&vma->last_fence);
INIT_LIST_HEAD(&vma->closed_link);
@@ -956,9 +955,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
}
obj->read_domains |= I915_GEM_GPU_DOMAINS;
- if (flags & EXEC_OBJECT_NEEDS_FENCE)
- __i915_active_request_set(&vma->last_fence, rq);
-
export_fence(vma, rq, flags);
return 0;
}
@@ -989,14 +985,7 @@ int i915_vma_unbind(struct i915_vma *vma)
* before we are finished).
*/
__i915_vma_pin(vma);
-
ret = i915_active_wait(&vma->active);
- if (ret)
- goto unpin;
-
- ret = i915_active_request_retire(&vma->last_fence,
- &vma->vm->i915->drm.struct_mutex);
-unpin:
__i915_vma_unpin(vma);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 07ed81ed898b..9cddc9e94b2f 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -109,7 +109,6 @@ struct i915_vma {
#define I915_VMA_GGTT_WRITE BIT(14)
struct i915_active active;
- struct i915_active_request last_fence;
/**
* Support different GGTT views into the same object.
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index d67dedf0cbd8..bea4835aab3f 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -203,6 +203,7 @@ struct intel_runtime_info {
u8 num_scalers[I915_MAX_PIPES];
u8 num_engines;
+ u8 num_fences;
/* Slice/subslice/EU info */
struct sseu_dev_info sseu;
--
2.20.1
More information about the Intel-gfx-trybot
mailing list